diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,110886 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.011352024972438812, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.009680473245680332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0048089963383972645, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.005056488327682018, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.005056346766650677, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0019768390338867903, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.010821578092873096, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.009598405100405216, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.005326221231371164, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.004667424131184816, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.004888031631708145, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.005064724944531918, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.004665672313421965, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.002734588924795389, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0020720285829156637, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0026797461323440075, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.0018711868906393647, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0015915163094177842, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0018210166599601507, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.001525377039797604, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0017634177347645164, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0018209450645372272, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0013771118829026818, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0015064894687384367, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.011477116495370865, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.009806735441088676, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.00481945276260376, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.005029837600886822, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.005029535852372646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0018476091790944338, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.011210514232516289, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.009697568602859974, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.005294399335980415, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0046087596565485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.004823548719286919, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.005018086638301611, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.004606103990226984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.0026477943174540997, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.001909355167299509, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0026149044279009104, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0016702150460332632, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0013660760596394539, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0016092879232019186, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.001284329337067902, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0016108703566715121, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0016089041018858552, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.001200815662741661, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0012598525499925017, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.12164101004600525, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07268531620502472, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04472779855132103, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05028523877263069, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.05024697631597519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02228897623717785, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08176479488611221, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.06680021435022354, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.057345662266016006, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03245504945516586, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0391223318874836, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04429128021001816, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.032121043652296066, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.024523740634322166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.022321049124002457, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02255965769290924, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.012986283749341965, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01198556087911129, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009667729027569294, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007956093177199364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.011573879979550838, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.009607338346540928, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006684723310172558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006263769697397947, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12050135433673859, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07692547142505646, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05209676921367645, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05155877396464348, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04968860000371933, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02585512027144432, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0750856101512909, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06706611067056656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05713413655757904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03329601511359215, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.035937584936618805, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03840020298957825, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03240147978067398, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024816393852233887, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022690005600452423, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.019471196457743645, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01435916405171156, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013585555367171764, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011716385371983051, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010385453701019287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01084844209253788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01159533578902483, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008099192753434181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009136220440268517, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.12018295377492905, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11188331246376038, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.10940990597009659, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.09958380460739136, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.05412745475769043, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.05168517678976059, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0603720024228096, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05571865290403366, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.05488007888197899, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.04929831624031067, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.04710361734032631, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.030690517276525497, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.02674810402095318, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.026078365743160248, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.02591596357524395, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.015444023534655571, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.014048016630113125, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.013985083438456059, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.013173690997064114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0130838667973876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00862103421241045, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009375376626849174, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.008379403501749039, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007251635193824768, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.140920028090477, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13286074995994568, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1304207146167755, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11882374435663223, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06389826536178589, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06152312830090523, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07069004327058792, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06532900780439377, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06457448750734329, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05856705456972122, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05585096403956413, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.035677094012498856, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.030962975695729256, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03035368211567402, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.030207961797714233, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.017784466966986656, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.015556080266833305, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.015495171770453453, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0145175876095891, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.014431950636208057, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009326725266873837, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00929609127342701, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009082534350454807, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006116670090705156, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.07728518545627594, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.06622051447629929, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.06065775454044342, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.05363195016980171, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.034406814724206924, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.029768146574497223, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.04711940884590149, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.039756808429956436, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.03592579439282417, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.028620878234505653, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.027379769831895828, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.022643176838755608, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.019062913954257965, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.016931142657995224, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.016410956159234047, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.011618801392614841, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.009578116238117218, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.009388155303895473, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.008573509752750397, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.008279979228973389, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.006941701285541058, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.00710683036595583, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.006325129419565201, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.00576463807374239, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.021290522068738937, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.013978807255625725, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.007972966879606247, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.008445339277386665, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.008191565051674843, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0035750020761042833, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.014577334746718407, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.013177837245166302, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.009961283765733242, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.006429423112422228, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.006896783132106066, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.007362013682723045, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.006293320097029209, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.004118421580642462, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.003423314541578293, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0037102773785591125, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.002387125976383686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.002111184410750866, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.002094675088301301, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0016805602936074138, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0020248815417289734, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.002073715440928936, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0012741395039483905, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00152208236977458, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.018556412309408188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.012426783330738544, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.006798212882131338, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.007217010948807001, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.007029475178569555, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0028609551955014467, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.013335051946341991, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.011869557201862335, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.00860669743269682, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0057399701327085495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.006210125517100096, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.006655463483184576, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0056465258821845055, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.003518429584801197, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.002808947116136551, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.003351694904267788, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0020123403519392014, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0017264707712456584, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0017941228579729795, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0013704003067687154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0017892946489155293, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.001779327285476029, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0010258747497573495, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0012496704002842307, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1441131979227066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09398606419563293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06791387498378754, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0646025538444519, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.05909544974565506, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.034234363585710526, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08650432527065277, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.07837089896202087, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.06856917589902878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04100105166435242, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.042043279856443405, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04417416453361511, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03744591772556305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02874249406158924, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.026288291439414024, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02206733077764511, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.015211602672934532, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01419578492641449, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.011766412295401096, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009934064000844955, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.011440247297286987, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.011143182404339314, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.007753458805382252, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007240150589495897, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.15842588245868683, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1297897845506668, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.11734788864850998, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0966697633266449, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07180994749069214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.059597767889499664, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09231335669755936, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08195193111896515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07523880153894424, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05427803844213486, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.050827424973249435, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04740087315440178, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03997904434800148, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.03548871725797653, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.03436809033155441, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024045076221227646, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.019890425726771355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.019387496635317802, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.016811560839414597, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.016091590747237206, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013453047722578049, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014625228941440582, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.011755725368857384, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011657373048365116, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16895882785320282, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15936371684074402, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1565893292427063, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1417706459760666, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07833106070756912, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07519225031137466, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08710457384586334, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07995675504207611, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07917279005050659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07117126137018204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06784316152334213, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044656604528427124, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.038842104375362396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03812364861369133, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0379607267677784, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02264849655330181, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020925436168909073, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02086811698973179, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019643325358629227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01954091526567936, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013020351529121399, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014378130435943604, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.012741032056510448, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0115517508238554, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.20468579232692719, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19365017116069794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19038468599319458, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17268604040145874, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09519728273153305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09157873690128326, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1051694005727768, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09697864204645157, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09611844271421432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08662689477205276, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08226421475410461, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05373553931713104, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.046676766127347946, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04590893164277077, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04573051258921623, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02691747061908245, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024487722665071487, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02440100722014904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022863047197461128, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.022751584649086, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014744129031896591, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015938114374876022, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014430893585085869, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01207040250301361, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.018825944513082504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.018489742651581764, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.0063452767208218575, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.0059749227948486805, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.005320705007761717, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0030706580728292465, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.01987334154546261, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.018078982830047607, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.005394395906478167, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.005084249656647444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.005099160596728325, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.004964109044522047, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.004856608342379332, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.002679575700312853, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0024963589385151863, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.002529558725655079, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0022813715040683746, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.0009554082062095404, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.002254109364002943, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.000883142405655235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.002256211591884494, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.002212934661656618, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.0007087035337463021, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0007682503783144057, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.056835293769836426, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.04380691051483154, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.03688701242208481, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.032942306250333786, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.024295292794704437, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.01816997118294239, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.03393946588039398, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0308909323066473, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.026788709685206413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.019042696803808212, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.018714124336838722, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.01720155030488968, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.014718791469931602, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.011782553978264332, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01099536381661892, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.008604619652032852, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.006257622968405485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.005914708599448204, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.005253097508102655, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00471073342487216, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004480469040572643, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004487864673137665, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0033303680829703808, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0030074238311499357, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.05935288593173027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04378344863653183, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03468756750226021, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0316225029528141, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.024717994034290314, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.017009606584906578, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03634079173207283, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.033150218427181244, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.027839798480272293, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.019074052572250366, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.01904134638607502, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.018383529037237167, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.015760039910674095, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.012048142962157726, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.011003030464053154, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009186560288071632, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.006465826649218798, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006004403345286846, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00540620693936944, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.004686055239289999, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004790361505001783, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004831994883716106, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0033691891003400087, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0032535637728869915, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.17358042299747467, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.14307665824890137, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.13101953268051147, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.11482954025268555, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.07783052325248718, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.06501591205596924, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09680550545454025, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08807200938463211, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08273126929998398, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06205303966999054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.05880236625671387, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04939516261219978, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04212931916117668, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.03747249394655228, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.036343369632959366, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024703091010451317, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.019355719909071922, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01885688677430153, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.016158010810613632, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.015347996726632118, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012870138511061668, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01243012584745884, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.010861615650355816, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007931120693683624, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.15713158249855042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.13950757682323456, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1310299038887024, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.11179966479539871, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07291098684072495, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.06462997198104858, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09023643285036087, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0815173014998436, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07534634321928024, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06103591248393059, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.056217487901449203, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04686693102121353, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03984704613685608, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.036009352654218674, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.035050179809331894, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.023964527994394302, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02008920907974243, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.019644679501652718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01795186474919319, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017373718321323395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013768773525953293, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014581136405467987, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01242959126830101, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011616699397563934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2074214220046997, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19485162198543549, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19107136130332947, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1721152812242508, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09708118438720703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0926806852221489, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1077166348695755, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09932506084442139, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0982850193977356, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0871812030673027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08227062225341797, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.054988980293273926, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04766932874917984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04667359218001366, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04644186794757843, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.027528878301382065, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024448921903967857, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.024349093437194824, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022502586245536804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.022360535338521004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014787424355745316, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015364677645266056, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014385251328349113, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011005141772329807, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23901918530464172, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22479309141635895, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22057420015335083, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19880031049251556, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11182872951030731, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10686768591403961, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12377151846885681, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11421697586774826, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11313030123710632, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10046112537384033, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0947113037109375, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06302861869335175, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05458889901638031, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.053504932671785355, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.053248465061187744, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03149763122200966, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027484230697155, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027367746457457542, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025190560147166252, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025027966126799583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016611794009804726, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01649293303489685, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016152942553162575, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010917740873992443, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.1833783984184265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16142114996910095, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15299265086650848, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1351904571056366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08266269415616989, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07443994283676147, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.098195381462574, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.08961914479732513, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.08542659878730774, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0700313001871109, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.06620971113443375, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.04983487352728844, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04288335517048836, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.03978140652179718, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.03901657089591026, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.025020809844136238, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.021029390394687653, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.020740818232297897, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.01862136833369732, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.018131615594029427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01356035191565752, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.013845539651811123, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.012499025091528893, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.009936812333762646, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.043591853231191635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.03598157316446304, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.03189978376030922, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.028036782518029213, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.01932024210691452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.015737367793917656, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.02583092823624611, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.023502925410866737, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.020639758557081223, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.015690242871642113, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.015094771981239319, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.013128438033163548, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.011240771040320396, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.009363315999507904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.008873318322002888, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0065809860825538635, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.004975882824510336, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.004752149805426598, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.004280640743672848, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.003955692984163761, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.003487490816041827, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0034929479006677866, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0027731910813599825, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.002400278113782406, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.04369093477725983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.03496447950601578, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.029724614694714546, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.02621849626302719, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.018899327144026756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.014528964646160603, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.02672591619193554, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.024286232888698578, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0205085352063179, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.01519237644970417, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.014882163144648075, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.013540536165237427, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.011583543382585049, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.009131407365202904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.008468869142234325, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.006772363558411598, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.004800689872354269, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.004489819053560495, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.004079381003975868, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.003626609919592738, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0035143450368195772, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0034210500307381153, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0025712461210787296, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.002193542430177331, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.17755603790283203, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.14932335913181305, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.13856598734855652, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.12089547514915466, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08007105439901352, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.06864239275455475, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09742094576358795, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08949433267116547, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08436059951782227, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06476421654224396, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06105126440525055, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04947834834456444, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04270340874791145, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.03841090574860573, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.03733336925506592, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024724788963794708, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.019707322120666504, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.019243594259023666, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.016604751348495483, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.015856683254241943, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012734511867165565, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012384121306240559, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.010932616889476776, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007682035211473703, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.17003022134304047, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.14862744510173798, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1407172977924347, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.11977653205394745, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07850959151983261, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07028254121541977, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09293251484632492, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08451274782419205, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08084447681903839, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06412072479724884, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05857551097869873, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04789144918322563, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04098684713244438, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.038325268775224686, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.037689320743083954, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024137360975146294, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02088841423392296, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02061108499765396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018225444480776787, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017834771424531937, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013396269641816616, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014438889920711517, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01240421924740076, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011225007474422455, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.21001894772052765, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19739344716072083, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19359083473682404, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17473381757736206, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0986238494515419, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09416545927524567, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10949256271123886, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1009034737944603, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09983226656913757, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08863812685012817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08379566669464111, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05590846762061119, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04835934191942215, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04735039174556732, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04710979759693146, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.0279876459389925, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.024640614166855812, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02452816627919674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022653548046946526, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.022507354617118835, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015018763951957226, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015247777104377747, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014594373293220997, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010658131912350655, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24660049378871918, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23197303712368011, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22762617468833923, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2056158185005188, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11586911231279373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11073262989521027, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12851206958293915, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1184527799487114, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11730223149061203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10425141453742981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09863585233688354, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06562947481870651, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.056691769510507584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05554908886551857, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.055278319865465164, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03282332420349121, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028657326474785805, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028531603515148163, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026312323287129402, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026145504787564278, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017494764178991318, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017356228083372116, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01701011322438717, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011701183393597603, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20590247213840485, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18278349936008453, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17415522038936615, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15433427691459656, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09340258687734604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0847046971321106, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1098361611366272, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10041368752717972, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09633670002222061, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07964996248483658, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07522064447402954, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05577404797077179, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.048046935349702835, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04490366950631142, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04413748160004616, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.027995724231004715, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02359308861196041, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.023316768929362297, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02093905210494995, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02044946514070034, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015084213577210903, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015286082401871681, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014004875905811787, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01080811582505703, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.060437269508838654, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05055942386388779, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.045061856508255005, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.03984476253390312, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.02689089998602867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.022181307896971703, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.036198124289512634, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03266748785972595, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.028648916631937027, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02218327485024929, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.021412841975688934, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.018457703292369843, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01566096767783165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.01308011170476675, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.012399379163980484, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.009264027699828148, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.007012765854597092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.006707280408591032, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.006117839366197586, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0056872800923883915, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004943256266415119, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004980314988642931, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0039525870233774185, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003526133019477129, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.058287475258111954, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04740713909268379, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04117162898182869, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.03653956204652786, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02537456713616848, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.020145149901509285, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.035367388278245926, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.031857680529356, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.027484076097607613, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.020712506026029587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02021002769470215, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.018051570281386375, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.015226179733872414, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.012321199290454388, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.011525025591254234, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009078750386834145, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.006545473821461201, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.00618763780221343, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.005650598555803299, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005132547114044428, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004765789490193129, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004674083553254604, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0036077473778277636, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003164788242429495, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19495868682861328, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1665712296962738, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1560843586921692, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.13705074787139893, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08828765153884888, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07715894281864166, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10610000789165497, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09734310954809189, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09256750345230103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0727207288146019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06840832531452179, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05401286110281944, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04656863212585449, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04238644614815712, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0413682647049427, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02699347771704197, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021792631596326828, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02135753072798252, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01861751824617386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.017918607220053673, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013973120599985123, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013657728210091591, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012215142138302326, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008637511171400547, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.16123326122760773, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.14313258230686188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.13641303777694702, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.11663080751895905, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.07461255043745041, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.06742189079523087, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08889597654342651, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07986703515052795, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.07668522000312805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06171136721968651, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05746946483850479, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04585270583629608, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03888513892889023, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.036509789526462555, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.035939864814281464, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02324230968952179, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01996173895895481, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01971535012125969, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.017588747665286064, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.017235608771443367, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013039329089224339, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013858466409146786, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012149393558502197, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010843051597476006, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.18162056803703308, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16986091434955597, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16599872708320618, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.14957301318645477, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.085291787981987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08096010982990265, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09562534838914871, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08799853920936584, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08655087649822235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0761948898434639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07201583683490753, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04887531325221062, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.042203985154628754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.040992554277181625, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04070029407739639, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024504249915480614, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02132607437670231, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.021195093169808388, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019515588879585266, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01933632791042328, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013184227980673313, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01323773805052042, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01269837561994791, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009194784797728062, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24739572405815125, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2319541871547699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22714000940322876, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2049562782049179, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11635012924671173, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11077576130628586, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12996836006641388, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11954741179943085, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11801191419363022, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10425551235675812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09858438372612, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06627246737480164, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.057245127856731415, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0558202750980854, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05547722429037094, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.033227089792490005, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02873402275145054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028580524027347565, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02627928927540779, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026067644357681274, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01772206462919712, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017367452383041382, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.017141960561275482, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011561319231987, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20717747509479523, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18597713112831116, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17785042524337769, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15832337737083435, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.0945335403084755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08639127016067505, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11162792146205902, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1014970988035202, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09722760319709778, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08170164376497269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07733012735843658, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05680951848626137, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04871612787246704, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04556809365749359, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.044802483171224594, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028584206476807594, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0241225678473711, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02383456937968731, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021662242710590363, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021186599507927895, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01551765389740467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015844687819480896, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01440603006631136, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011467239819467068, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07042835652828217, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06083226203918457, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.055597834289073944, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.049166239798069, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.031740959733724594, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02727874182164669, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.041114844381809235, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03742334991693497, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0333615243434906, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.026773104444146156, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.025570055469870567, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.020859815180301666, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.017887013033032417, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.015382998622953892, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.014732342213392258, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010461277328431606, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008152994327247143, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.007851941511034966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007192885037511587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.006775805726647377, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005554441828280687, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005606255494058132, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004603481385856867, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003889688989147544, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06485484540462494, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05489803105592728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04915637522935867, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.043328650295734406, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.028811492025852203, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.023948557674884796, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03842681646347046, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.035065554082393646, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.030545393005013466, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02402050793170929, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.023102618753910065, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.019504031166434288, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01672995276749134, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.013887407258152962, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01314373966306448, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009738704189658165, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007228021044284105, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006876581348478794, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006281503941863775, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005781551357358694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005035687237977982, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0049196090549230576, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003951402381062508, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0031288887839764357, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.210117906332016, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18566296994686127, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17713682353496552, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1562233716249466, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09667028486728668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08718133717775345, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1127920001745224, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10371549427509308, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09987290948629379, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08163536339998245, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07640054076910019, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05725740268826485, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04952308535575867, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04631620645523071, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04549731686711311, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02856474369764328, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02354268729686737, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.023199625313282013, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020424142479896545, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.0199049711227417, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014660472981631756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014173189178109169, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01326958741992712, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008603344671428204, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19950300455093384, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17974413931369781, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17248263955116272, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15045416355133057, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09226533025503159, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08468002825975418, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10768210142850876, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09842358529567719, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09478294104337692, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07854641228914261, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07350942492485046, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05547937750816345, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047473639249801636, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.044613320380449295, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04393765330314636, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027955392375588417, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023639610037207603, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02332458458840847, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02095481939613819, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020528879016637802, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015264883637428284, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015495364554226398, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014152685180306435, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011261295527219772, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19294121861457825, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18048521876335144, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1764351725578308, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.15893346071243286, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09065522253513336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08602999150753021, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1014430820941925, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.093582384288311, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09200141578912735, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08103346079587936, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07654949277639389, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05169571563601494, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04480094462633133, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.043479930609464645, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04316231235861778, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025859065353870392, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.022419847548007965, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.022278184071183205, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020476408302783966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02027604542672634, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013657658360898495, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013634858652949333, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.013118267059326172, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009111697785556316, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24979659914970398, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23432886600494385, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2294768989086151, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20703907310962677, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11755813658237457, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.11191287636756897, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.13088358938694, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.12087341398000717, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1191767081618309, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1054115891456604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09965889155864716, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06667997688055038, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05781444534659386, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05632314831018448, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.055971670895814896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03331584855914116, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028865689411759377, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028707467019557953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.026380838826298714, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.026159366592764854, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017432060092687607, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017261523753404617, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016812190413475037, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011231131851673126, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21367765963077545, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19180864095687866, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1828734427690506, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1628098040819168, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09786171466112137, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0891580730676651, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11625803261995316, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1057124212384224, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10065202414989471, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0843324363231659, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08001495152711868, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05930786579847336, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0506909005343914, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04711626470088959, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.046245213598012924, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029904406517744064, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02478945255279541, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024441109970211983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022162748500704765, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.0216086246073246, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01625251956284046, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016137605533003807, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014985281974077225, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011410336941480637, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.06745301932096481, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05787719413638115, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.051926009356975555, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.04592239856719971, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03029404953122139, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.025397546589374542, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.040885455906391144, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03699348121881485, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.031964175403118134, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02548576146364212, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.024574575945734978, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02073913998901844, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01768382079899311, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.014712660573422909, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.013926499523222446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010396662168204784, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.007836207747459412, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.007462608627974987, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.006909835617989302, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0064096651040017605, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005481144413352013, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005528191104531288, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0043472102843225, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003827928099781275, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06260231137275696, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05264753848314285, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.046199966222047806, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04083540290594101, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.027704421430826187, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02246048115193844, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03838350996375084, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03475130721926689, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.029510296881198883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.023043878376483917, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.022332852706313133, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.019455747678875923, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.016588537022471428, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.013384879566729069, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.012519200332462788, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009705383330583572, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0070271617732942104, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006609690375626087, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0061135198920965195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005546699743717909, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005051196552813053, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004934592638164759, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003838289063423872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0032006476540118456, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1860847771167755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16443008184432983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1557808816432953, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1372692883014679, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0854506641626358, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07659222930669785, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10185369104146957, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09344063699245453, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0884724035859108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07231936603784561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06793995946645737, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.051682427525520325, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.044597722589969635, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04097934067249298, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04010085016489029, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025790385901927948, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.020938949659466743, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.020510785281658173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018242066726088524, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01763485185801983, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013262578286230564, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012895841151475906, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.011746593751013279, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007950966246426105, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.18385043740272522, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1617710143327713, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15340584516525269, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13293308019638062, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0848393440246582, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07547733932733536, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10139577090740204, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09183190017938614, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08776063472032547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07066021859645844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06552211940288544, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05216550827026367, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04439570754766464, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.041200704872608185, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04040776193141937, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026209672912955284, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02200903557240963, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.021639518439769745, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019303616136312485, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018797431141138077, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014180639758706093, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014779385179281235, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012884317897260189, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0109304403886199, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17134438455104828, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1605241447687149, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15687401592731476, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1416788548231125, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08050661534070969, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07644160836935043, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0903552696108818, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08324466645717621, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08168134838342667, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0721639096736908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06832308322191238, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04611344262957573, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039876412600278854, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.038642510771751404, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03835344314575195, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023088529706001282, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02001095749437809, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019878758117556572, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01833276078104973, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018148353323340416, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012294663116335869, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012299594469368458, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011792503297328949, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00836892519146204, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23740644752979279, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22298786044120789, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2182627171278, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19738376140594482, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11182139068841934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10644623637199402, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12489058822393417, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11514795571565628, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11335364729166031, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10041148215532303, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09507352858781815, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06369015574455261, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05509794130921364, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.053596362471580505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05324261635541916, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031861551105976105, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02753533236682415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027373356744647026, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025207089260220528, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024985503405332565, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016784491017460823, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016574189066886902, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016173038631677628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010893713682889938, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2122400999069214, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19167816638946533, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1838274896144867, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16407713294029236, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09756492078304291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08966390788555145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11518903821706772, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10433119535446167, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10014721006155014, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08469261974096298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08067811280488968, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058668989688158035, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05009467527270317, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04705344885587692, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04629999399185181, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029494155198335648, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02487543411552906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024587390944361687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022393763065338135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02192530408501625, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015990478917956352, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01627272181212902, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01491608191281557, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011752321384847164, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07368312776088715, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06524388492107391, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.058716706931591034, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05196855217218399, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.033551618456840515, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02849716693162918, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.045955657958984375, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04137164726853371, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.034900110214948654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.028888484463095665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.027947457507252693, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02341492660343647, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.019909929484128952, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.016311924904584885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.015355139039456844, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01175627950578928, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008757604286074638, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008274498395621777, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007873104885220528, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00727404560893774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006226843222975731, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006295682396739721, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0048815603367984295, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004415824078023434, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.06395983695983887, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.055887967348098755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.049080636352300644, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0432746559381485, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.028728609904646873, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.023607881739735603, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04060055688023567, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.036894578486680984, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03005986101925373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.024589059874415398, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.023868218064308167, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.020601127296686172, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.017691530287265778, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01390902977436781, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.012875471264123917, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010346534661948681, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007332100532948971, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.0067993588745594025, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00652318587526679, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005859715398401022, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005364003125578165, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00526396231725812, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003971028607338667, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003418054897338152, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19873937964439392, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17860601842403412, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1709216684103012, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15058216452598572, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09201360493898392, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08378448337316513, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.108126200735569, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09868378192186356, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09459316730499268, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07865295559167862, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07372245192527771, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0550462007522583, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.047211624681949615, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04410484433174133, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04337243735790253, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02745058573782444, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02252965420484543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.022188007831573486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01978812739253044, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019288834184408188, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014162842184305191, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013715428300201893, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012799150310456753, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008557401597499847, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1930551826953888, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17261065542697906, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16433411836624146, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14058873057365417, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08974676579236984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08119257539510727, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10804883390665054, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09700122475624084, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09228172898292542, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07498206198215485, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06967420876026154, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05584794655442238, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04720936343073845, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.043953441083431244, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04317564144730568, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028179831802845, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023969464004039764, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023604486137628555, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021200144663453102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020713847130537033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015690738335251808, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016596633940935135, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014503179118037224, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012859206646680832, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16811171174049377, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15773963928222656, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1543261557817459, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1395176202058792, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07918117940425873, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07532058656215668, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08850614726543427, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0815688893198967, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08031411468982697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07104586809873581, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06727510690689087, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04523969814181328, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03917139768600464, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.038089193403720856, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.037838466465473175, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022647799924016, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01985746994614601, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019745472818613052, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01824198104441166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018086593598127365, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01210565585643053, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012388458475470543, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011665618978440762, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00868840143084526, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2379271686077118, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22360201179981232, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21908625960350037, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1982893943786621, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11219623684883118, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1069764569401741, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12532104551792145, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11521067470312119, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1137063279747963, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10082627087831497, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09561602771282196, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06410554051399231, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05525447055697441, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05390479415655136, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.053588565438985825, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03212656080722809, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02792953886091709, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027787335216999054, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025639859959483147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0254399124532938, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017240425571799278, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01713263802230358, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016681861132383347, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011735258623957634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20715844631195068, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18632441759109497, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1781129688024521, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1592809110879898, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09512431919574738, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08687739074230194, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11252188682556152, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10223735123872757, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09774754196405411, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08224885165691376, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07846838235855103, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05739583447575569, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04904588684439659, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.045834798365831375, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04505566507577896, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02884741872549057, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024167805910110474, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.023865079507231712, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021672764793038368, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021184025332331657, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015606887638568878, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015757892280817032, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014484452083706856, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011265452019870281, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.07274093478918076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06398167461156845, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05901987850666046, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.051767498254776, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.03307585045695305, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.02882423810660839, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.042155150324106216, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03850950300693512, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.034373149275779724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.027984609827399254, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.02654922381043434, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.021357310935854912, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.018378976732492447, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.015968091785907745, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01535341702401638, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0106996800750494, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.008393123745918274, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.008087512105703354, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007395023014396429, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0069939144887030125, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005628315731883049, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005652714520692825, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.004719635471701622, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0038322079926729202, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0639771893620491, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05507109314203262, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.049992870539426804, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.04363088309764862, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.02863454632461071, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02428140491247177, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.037456851452589035, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03412633389234543, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03006300888955593, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02387368306517601, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02277533710002899, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.019023412838578224, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.016296332702040672, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.013804573565721512, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.013153597712516785, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009524213150143623, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.007207225542515516, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.006885834038257599, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006263815797865391, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.005834262818098068, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004957451019436121, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004874145146459341, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.003991379402577877, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0031904929783195257, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.18610163033008575, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16495119035243988, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.15707367658615112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1373746544122696, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08540261536836624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07700732350349426, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10137973725795746, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09212584793567657, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08823493123054504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0720210000872612, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06732641905546188, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.051638972014188766, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04409341514110565, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.040970250964164734, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04020749032497406, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025788258761167526, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021063512191176414, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.020721333101391792, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018371988087892532, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01785518042743206, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013339348137378693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013092066161334515, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.011921821162104607, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008446093648672104, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19815996289253235, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17976465821266174, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17265908420085907, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15144865214824677, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09206929802894592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08476109057664871, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10829168558120728, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09837426990270615, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09448032081127167, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07915785908699036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07407382875680923, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.055696506053209305, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047385044395923615, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04449989274144173, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.043799012899398804, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027870014309883118, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023410774767398834, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023099388927221298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02085740678012371, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020420558750629425, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014839133247733116, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01516404002904892, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013761857524514198, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010776226408779621, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1641969382762909, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1541924625635147, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1508277803659439, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13639801740646362, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07734609395265579, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07354967296123505, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0867111086845398, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07977785170078278, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07839500904083252, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06943081319332123, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06580647826194763, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044285472482442856, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03826013207435608, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03715372458100319, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03690529614686966, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022162413224577904, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01930447854101658, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019186099991202354, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017721962183713913, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01756362058222294, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011799439787864685, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011941410601139069, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011356739327311516, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008250841870903969, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23443543910980225, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22038348019123077, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2158605009317398, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19512449204921722, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11059247702360153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10532958060503006, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12352047860622406, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11369466781616211, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11206325143575668, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09928756207227707, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09401359409093857, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06303215026855469, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05444209277629852, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05304820090532303, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05271576717495918, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03151734918355942, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02730935625731945, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027159176766872406, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025016728788614273, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024803124368190765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016588088124990463, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016514115035533905, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01600096933543682, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010991213843226433, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20975209772586823, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1893489509820938, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18166963756084442, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16181400418281555, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09645146131515503, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08860622346401215, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11309275776147842, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10285982489585876, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09891370683908463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0833834558725357, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07907011359930038, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05763966217637062, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049394410103559494, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04650724306702614, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04580609127879143, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029060987755656242, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024588672444224358, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024320611730217934, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02207118831574917, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021625563502311707, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015895787626504898, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016055436804890633, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014910649508237839, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011599124409258366, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09305763989686966, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0821881890296936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07617272436618805, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06689299643039703, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04238395020365715, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03717793524265289, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.053913459181785583, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04892453923821449, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0439705029129982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03598710522055626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03419721871614456, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02741214819252491, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.023454464972019196, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02053670957684517, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01980116404592991, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013755647465586662, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01094516646116972, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010588771663606167, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009725046344101429, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009262088686227798, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007348077837377787, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0075245448388159275, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006235032342374325, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005373104941099882, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07992376387119293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06968246400356293, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06367132067680359, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05577866733074188, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03601401671767235, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.030921999365091324, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0467686653137207, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04248376935720444, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03763854503631592, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.030324330553412437, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.02893686108291149, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0237408559769392, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.020323464646935463, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.017345143482089043, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01658143661916256, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011883052065968513, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009052073583006859, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008672080934047699, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007931627333164215, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007426897995173931, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0061841062270104885, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0060850707814097404, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005040808580815792, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003980077337473631, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19288596510887146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1741875559091568, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16748999059200287, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.14666330814361572, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08944670855998993, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08207922428846359, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10330270975828171, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09480142593383789, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09153472632169724, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07622844725847244, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07085622847080231, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05242748185992241, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.045224376022815704, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.042811013758182526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04219438508152962, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0261659137904644, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021744387224316597, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.021466847509145737, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01901194453239441, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018602702766656876, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013431381434202194, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012979782186448574, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012362638488411903, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00790471863001585, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19593441486358643, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1771821230649948, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1699334681034088, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14767074584960938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09118302166461945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08353284746408463, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10686176270246506, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09738080203533173, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09348741173744202, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07732392847537994, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0723114088177681, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05483820289373398, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04676835983991623, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.043944619596004486, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04323635995388031, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02742127887904644, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02287098579108715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02255258709192276, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020081380382180214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019632499665021896, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014430616982281208, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014485248364508152, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013324340805411339, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009910336695611477, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16161136329174042, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15129373967647552, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14781439304351807, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13348305225372314, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07603836059570312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07216114550828934, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08537425100803375, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07858878374099731, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07718685269355774, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06803050637245178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06451736390590668, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04368431121110916, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037744976580142975, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0366012379527092, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.036328963935375214, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021887313574552536, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019136909395456314, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019020676612854004, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017552515491843224, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017388207837939262, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011733565479516983, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012028388679027557, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011270849965512753, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008520961739122868, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22862328588962555, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2141641080379486, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20957417786121368, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1892169713973999, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10760441422462463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10224279761314392, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12062136828899384, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11083917319774628, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10917235165834427, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09629752486944199, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09126938134431839, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06165294721722603, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0531284473836422, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05168283358216286, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05133678764104843, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03086879849433899, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026775240898132324, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026619331911206245, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024503616616129875, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024282827973365784, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016483701765537262, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0164521262049675, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015873461961746216, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011244796216487885, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2156054824590683, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.194500133395195, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18643790483474731, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16549144685268402, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09912559390068054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09096033871173859, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11675665527582169, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1060246080160141, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10172800719738007, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08547679334878922, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08082538098096848, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05927864834666252, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05078822001814842, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04768265783786774, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04694240167737007, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029768256470561028, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025011733174324036, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.0247257798910141, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022348478436470032, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021871989592909813, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01608959026634693, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01609836332499981, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015019440092146397, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011331579647958279, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.08082568645477295, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07187711447477341, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06700631976127625, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.05833404138684273, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.037015654146671295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03274067863821983, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04620716720819473, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04217899218201637, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.0381951741874218, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03130137547850609, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.02947879582643509, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02344883605837822, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02013690583407879, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.017850559204816818, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.01726834662258625, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011741907335817814, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.009343976154923439, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.00905139371752739, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.008211922831833363, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.007831839844584465, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00617196224629879, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0061808698810637, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.005287608597427607, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004173995926976204, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07359808683395386, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06469830870628357, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06002126261591911, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05201971158385277, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03340175375342369, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.02925211936235428, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04216946288943291, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.038231395184993744, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03460416570305824, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.027961036190390587, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.026370089501142502, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02141469717025757, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01827141083776951, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01604226790368557, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.015471860766410828, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010674209333956242, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.008301502093672752, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008020490407943726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007223955821245909, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.006844713818281889, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005527160596102476, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005410305690020323, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0046456921845674515, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0034807701595127583, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1802336573600769, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16155728697776794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1547575742006302, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.13444827497005463, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08346226066350937, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07602858543395996, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09759337455034256, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08883123099803925, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08572720736265182, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0703098401427269, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06537793576717377, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04969196021556854, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04245787113904953, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.03999129682779312, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0393984355032444, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024804439395666122, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0204655509442091, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02018914744257927, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.017778057605028152, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.017375405877828598, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012805959209799767, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012449988164007664, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.011662641540169716, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00788106769323349, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19413477182388306, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1753351241350174, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16758838295936584, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1469835788011551, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08980388939380646, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08211816102266312, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10689923912286758, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09712403267621994, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09229182451963425, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07711254805326462, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07236526906490326, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05501924827694893, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.046784352511167526, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04349277541041374, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.042684439569711685, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0275920070707798, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023045426234602928, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022677496075630188, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020576205104589462, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020069386810064316, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014910003170371056, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015225455164909363, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013676553964614868, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011014528572559357, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1582847684621811, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14727845788002014, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14350764453411102, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12918493151664734, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07422542572021484, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07007070630788803, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08396020531654358, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07708990573883057, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0755041092634201, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06605184078216553, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.062476374208927155, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.042945604771375656, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03702958673238754, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.035732172429561615, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.035428985953330994, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021535880863666534, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018713608384132385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01857743225991726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017102254554629326, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016909001395106316, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011575983837246895, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011835893616080284, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011050505563616753, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008403673768043518, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2207336276769638, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20559629797935486, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20063050091266632, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18053314089775085, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10353055596351624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09786707907915115, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11654064804315567, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10717884451150894, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10525188595056534, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09209538996219635, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0870027020573616, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.059489961713552475, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05130898207426071, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.049670226871967316, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04927607253193855, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029743224382400513, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02563612163066864, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025459475815296173, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02332605980336666, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02307618223130703, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01565857231616974, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015664370730519295, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01496453769505024, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01051484514027834, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22194936871528625, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20053349435329437, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19256041944026947, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17063400149345398, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10239055752754211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09421849995851517, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12015771120786667, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10899727046489716, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10505366325378418, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08839642256498337, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0834844559431076, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.061411451548337936, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05237302929162979, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04936830326914787, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04863956943154335, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030885033309459686, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02603851445019245, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025758882984519005, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023315155878663063, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022854356095194817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016773397102952003, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016903605312108994, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015714501962065697, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01213167142122984, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09374567866325378, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08305899053812027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07747684419155121, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06729687005281448, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.043036267161369324, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03806021809577942, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.053488317877054214, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.048746585845947266, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04441966861486435, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03620411455631256, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03406485915184021, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.027196642011404037, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02336248941719532, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.020820219069719315, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02018858678638935, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013637939468026161, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011023127473890781, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.010705305263400078, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0096979308873415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.00928707979619503, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007243681233376265, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007421766873449087, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006267672870308161, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005229939240962267, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.07837123423814774, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06895216554403305, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06344479322433472, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.054908327758312225, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03559600189328194, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.030917692929506302, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04553208127617836, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.041560668498277664, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0368574783205986, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02983267977833748, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.028186235576868057, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.023189814761281013, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01984536647796631, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.017161257565021515, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.016467584297060966, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011594805866479874, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.00898022297769785, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008630492724478245, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007841335609555244, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00739480834454298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006066075526177883, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006016253028064966, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005060456693172455, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004013785161077976, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19302532076835632, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1710498183965683, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1629783809185028, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.14065340161323547, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08880346268415451, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08009723573923111, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10540047287940979, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09507858753204346, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09145815670490265, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07391148805618286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.06842108815908432, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.053668078035116196, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04541914910078049, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04250100627541542, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04180701822042465, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02682369388639927, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021782279014587402, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.021458422765135765, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01876583695411682, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.018275205045938492, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013870100490748882, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013342726975679398, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012413639575242996, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008451285772025585, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19954679906368256, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1795382797718048, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16557663679122925, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14528274536132812, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09258060157299042, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0812278538942337, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12210730463266373, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10972794890403748, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09602922946214676, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.080019511282444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07648655027151108, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06311003118753433, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053102049976587296, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04518766328692436, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.043159812688827515, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031624455004930496, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02433881349861622, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023352086544036865, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021939342841506004, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02068064734339714, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016943251714110374, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017255499958992004, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014108543284237385, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01246720366179943, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16296377778053284, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15116609632968903, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14706946909427643, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13230521976947784, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07651887834072113, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07200277596712112, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08706214278936386, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07977559417486191, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0778622254729271, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06786734610795975, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06426136195659637, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04464457556605339, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03841034322977066, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03695528954267502, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03661109134554863, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022419381886720657, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01954667456448078, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019389884546399117, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01786755956709385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01765047013759613, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012199409306049347, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012638337910175323, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01161093357950449, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009244242683053017, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22239433228969574, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20655812323093414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20130924880504608, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1810242235660553, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10424038767814636, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09827698022127151, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11776786297559738, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10816788673400879, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10599800199270248, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09245443344116211, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08731529116630554, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05998179316520691, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05176745727658272, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04998299852013588, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.049562714993953705, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03000614605844021, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.025753552094101906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025562336668372154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023367054760456085, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02309858240187168, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015748368576169014, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015679705888032913, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014986014924943447, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010415536351501942, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22773458063602448, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20587846636772156, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1978023499250412, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17519952356815338, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10530412197113037, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09695745259523392, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12337180227041245, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11195117235183716, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10796899348497391, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09088875353336334, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08577718585729599, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06317029148340225, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05383114889264107, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05080699175596237, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.050076648592948914, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03179621323943138, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02681727707386017, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026543566957116127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02402574196457863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02355680800974369, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017194917425513268, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017437009140849113, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016093695536255836, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012547777965664864, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09789632260799408, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08623262494802475, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07951655983924866, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.06911395490169525, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04481830820441246, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03901994600892067, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05817926675081253, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.052054110914468765, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.046450480818748474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.037694960832595825, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.035968150943517685, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029801657423377037, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025062980130314827, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.021782387048006058, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.020940935239195824, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015009371563792229, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011681635864078999, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011266957968473434, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010329734534025192, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.0098000168800354, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008087142370641232, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008142317645251751, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006764658261090517, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005878992844372988, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08470267802476883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07405143231153488, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06786585599184036, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05881382152438164, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03835950791835785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.033066313713788986, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.050056569278240204, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.044961076229810715, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03988298028707504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03210264444351196, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03043423593044281, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.025496844202280045, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.021568505093455315, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01856292597949505, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01779482513666153, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012797600589692593, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009807308204472065, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009426459670066833, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00858340784907341, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008087255991995335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006747814826667309, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006708301603794098, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005577879026532173, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004616273567080498, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1879449486732483, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16815149784088135, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1604091376066208, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.13951298594474792, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.08715509623289108, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.07898980379104614, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10353218019008636, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0936191976070404, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.08964745700359344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0733264610171318, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.068442702293396, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0527946874499321, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04473506286740303, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04181131720542908, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04107756167650223, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026386616751551628, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.021373707801103592, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.021021271124482155, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018539385870099068, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01806076429784298, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013633720576763153, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01306041982024908, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012271106243133545, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008211624808609486, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21252651512622833, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1889447122812271, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18011707067489624, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1587470918893814, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09833399206399918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08882390707731247, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11714281886816025, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10605520009994507, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10159486532211304, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08343421667814255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07880555093288422, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.060465745627880096, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05128156393766403, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04776095226407051, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.046917129307985306, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030415697023272514, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02538730762898922, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02498779259622097, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022471856325864792, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.021926352754235268, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016431130468845367, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016816994175314903, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015040921978652477, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012257596477866173, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16700048744678497, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15457122027873993, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15030550956726074, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13504256308078766, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07846656441688538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07373533397912979, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08934933692216873, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08168081939220428, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0799248069524765, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0693979486823082, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06584690511226654, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.045832835137844086, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03943296894431114, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03799095004796982, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03765781968832016, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02308918721973896, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02025127224624157, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02010253816843033, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018501726910471916, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01829768344759941, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01275269128382206, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013285242952406406, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01219133473932743, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009939693845808506, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23141488432884216, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21454720199108124, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20901305973529816, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18750067055225372, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10867713391780853, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10235540568828583, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12244876474142075, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11254958063364029, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11056029796600342, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09604187309741974, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09062711149454117, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06261090934276581, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05395635962486267, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05219637602567673, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05178465694189072, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031300656497478485, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027048660442233086, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026867980137467384, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02451753243803978, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024250322952866554, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016569102182984352, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01668642833828926, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.0158405601978302, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01139636430889368, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22992275655269623, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20723186433315277, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19769546389579773, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17509835958480835, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10603944957256317, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09685268998146057, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12626118957996368, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11459313333034515, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10891563445329666, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09128990024328232, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08618851751089096, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0643414631485939, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.055028095841407776, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05121690034866333, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05028599873185158, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03244553878903389, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027133731171488762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026745738461613655, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024288436397910118, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023687513545155525, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01763574592769146, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017881957814097404, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01629270240664482, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012892890721559525, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10058078914880753, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08994234353303909, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08477477729320526, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07385796308517456, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.046420544385910034, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04156380146741867, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05651570484042168, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05149103328585625, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.047743286937475204, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03922966495156288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03685031831264496, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02872544527053833, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.024620382115244865, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.022325212135910988, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02176504209637642, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014377288520336151, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.011633533984422684, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011351489461958408, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01021020207554102, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009844361804425716, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007540787570178509, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00752853462472558, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006614684127271175, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0050744242034852505, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08358532935380936, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0746474340558052, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0696462094783783, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06069726496934891, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.038319509476423264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.033954910933971405, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04787331074476242, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.043586693704128265, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03939766436815262, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03245816379785538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.030592678114771843, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024288184940814972, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02084559202194214, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.018438024446368217, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017854105681180954, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012135222554206848, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009594833478331566, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009286490269005299, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008422231301665306, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008025215938687325, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006336874794214964, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006269299890846014, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005428927019238472, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004122551996260881, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2066575288772583, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18493428826332092, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17716838419437408, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15362349152565002, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09552241116762161, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08691083639860153, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11211443692445755, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10143159329891205, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.09809347242116928, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08027251809835434, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07458756119012833, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.057110391557216644, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04839768633246422, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04569753259420395, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04504263028502464, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028534386307001114, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023271357640624046, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02296106331050396, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020139189437031746, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01968487724661827, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014634380117058754, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013959145173430443, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013231181539595127, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008581357076764107, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22621560096740723, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20602287352085114, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19833171367645264, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17569909989833832, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1050976812839508, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0969984233379364, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12280769646167755, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11197548359632492, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10761086642742157, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09093069285154343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08631585538387299, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06320501118898392, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05403285101056099, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.050897616893053055, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.05013788491487503, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031726595014333725, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026964260265231133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026625506579875946, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02422073297202587, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.023734310641884804, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017136700451374054, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017721548676490784, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015987088903784752, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012903274036943913, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.17108432948589325, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15852610766887665, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15408530831336975, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.13842305541038513, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.08046427369117737, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07561147958040237, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09161596745252609, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08397041261196136, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.08198399841785431, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07121738791465759, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06739652156829834, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04700929671525955, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04045258089900017, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.038868144154548645, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03849725052714348, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02356923185288906, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.020542392507195473, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.020373564213514328, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018743371590971947, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.018512388691306114, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012739429250359535, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013278264552354813, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01209731213748455, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009688137099146843, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2365759164094925, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21960705518722534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2139822393655777, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1920749694108963, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11119509488344193, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1047758013010025, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12565146386623383, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11525914818048477, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11311860382556915, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09838807582855225, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09281273931264877, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06423730403184891, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05524970963597298, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05340493470430374, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0529663972556591, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032153625041246414, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027606455609202385, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027403151616454124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02501787804067135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02474062331020832, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017074638977646828, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01693008653819561, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01629365049302578, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011418762616813183, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.24110722541809082, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21616211533546448, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.2067447304725647, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.18238115310668945, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.1112435981631279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.10151531547307968, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13111376762390137, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11891301721334457, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1144002303481102, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0949595645070076, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08951087296009064, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06708851456642151, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05713002756237984, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05359915271401405, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.052746035158634186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.033768944442272186, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.028170181438326836, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.027852896600961685, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024994170293211937, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.024450700730085373, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01824217662215233, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01820322684943676, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.017002275213599205, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012889482080936432, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10241835564374924, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09173483401536942, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08578169345855713, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07511857151985168, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04723288491368294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.042031340301036835, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.059226155281066895, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05368193984031677, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04861046373844147, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.040256790816783905, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03813687339425087, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030138129368424416, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025699162855744362, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02283138409256935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02210020273923874, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015090877190232277, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01203228160738945, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.011662938632071018, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010668247006833553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010199466720223427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007973968051373959, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008052505552768707, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006837244611233473, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0055862730368971825, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08284705132246017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07450702786445618, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06809490919113159, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.05983541160821915, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03798317909240723, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.033068589866161346, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05002044513821602, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.045467790216207504, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03908313065767288, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.032679978758096695, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03118291310966015, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02533496916294098, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.021720146760344505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01831892691552639, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01744508370757103, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012675907462835312, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009596253745257854, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009134247899055481, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008553521707654, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007988078519701958, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0066289170645177364, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006536138243973255, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005399767775088549, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004305317997932434, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21660995483398438, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19450075924396515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18613281846046448, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16235294938087463, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10044310986995697, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09138307720422745, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11993489414453506, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10739727318286896, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10301963984966278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08494794368743896, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07930237799882889, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0614265501499176, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05137837305665016, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.048184141516685486, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04741770029067993, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030681904405355453, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.024753544479608536, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024375690147280693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021589310839772224, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021073568612337112, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015853995457291603, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015209974721074104, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014137586578726768, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009767596609890461, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2471231073141098, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2123441845178604, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.2002299576997757, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1748332977294922, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.11359408497810364, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.10045517981052399, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.13647395372390747, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12340078502893448, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.1186470240354538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09347199648618698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08767347782850266, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.07026071101427078, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05967329069972038, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05512165278196335, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.054010190069675446, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03526464104652405, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.029315246269106865, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.028913414105772972, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.025441372767090797, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024728555232286453, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01894213631749153, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.019521895796060562, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01724860817193985, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.014196354895830154, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16012108325958252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14862589538097382, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14426195621490479, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12982812523841858, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07544432580471039, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07085119932889938, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08645062893629074, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07919887453317642, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.0768398568034172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0670287236571312, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06354281306266785, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044384658336639404, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03824295103549957, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.036537934094667435, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.036133576184511185, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022278979420661926, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01944872736930847, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019268987700343132, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01782573200762272, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017579443752765656, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012098429724574089, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012781066820025444, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011428900063037872, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009496662765741348, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2329186201095581, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2168794423341751, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2113320231437683, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19012142717838287, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10974781215190887, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10357102006673813, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12424710392951965, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11406312137842178, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11160588264465332, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09756678342819214, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09217503666877747, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06350281834602356, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05472505837678909, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.052764974534511566, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0523107536137104, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03178577125072479, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027405761182308197, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02719210647046566, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024965433403849602, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024672508239746094, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016892356798052788, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017020270228385925, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01608242280781269, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01168624684214592, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2259468138217926, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20215070247650146, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19223737716674805, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16999436914920807, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10395947843790054, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09434140473604202, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12496285140514374, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11290643364191055, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10703036189079285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08899981528520584, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08437637239694595, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06402325630187988, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.054414767771959305, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.050511423498392105, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04946623370051384, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0326140820980072, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0271421130746603, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026698149740695953, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024329334497451782, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02365604229271412, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018167534843087196, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018434135243296623, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01661813072860241, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013729767873883247, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11149106174707413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10076627135276794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09464345872402191, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08345458656549454, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.051612913608551025, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0462992861866951, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06453662365674973, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05828530713915825, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05299297347664833, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04450225830078125, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04224744811654091, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03294824808835983, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027990832924842834, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025004498660564423, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024253498762845993, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016618160530924797, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013300411403179169, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012913851998746395, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01193242333829403, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011453807353973389, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008861066773533821, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009006771259009838, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007654951419681311, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006432000081986189, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08473136276006699, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07643866539001465, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06964679807424545, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06148694083094597, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03874098137021065, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03359059989452362, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.052140671759843826, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0469571053981781, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03989173471927643, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03369864821434021, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.032441869378089905, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026492921635508537, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02250761166214943, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.018726464360952377, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017713649198412895, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013288182206451893, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009828743524849415, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009311280213296413, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008838080801069736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008212238550186157, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006971581373363733, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006789230741560459, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005570152774453163, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004502697382122278, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2295965552330017, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20999519526958466, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20222413539886475, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17978833615779877, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10757017880678177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0994846522808075, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12647458910942078, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11435925960540771, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11004791408777237, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0936121791601181, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08828047662973404, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0645735040307045, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05474868789315224, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05164271965622902, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0508633591234684, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03228744864463806, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026394477114081383, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02603098191320896, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023476125672459602, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.022987350821495056, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016631199046969414, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015967370942234993, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015172022394835949, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010006846860051155, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22030878067016602, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19845248758792877, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19044733047485352, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1638951152563095, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10239215940237045, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09351810067892075, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11977759748697281, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.108641617000103, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10477049648761749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08620263636112213, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07848973572254181, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06159655749797821, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.052220869809389114, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04934767633676529, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.048664722591638565, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03084838017821312, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025780905038118362, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.025463202968239784, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022564038634300232, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022105377167463303, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016422532498836517, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01641864888370037, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015296503901481628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011432652361690998, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15806035697460175, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14725667238235474, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14321650564670563, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12920941412448883, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07451637834310532, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07025228440761566, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08495192974805832, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07797049731016159, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07581926882266998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06652151048183441, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06308340281248093, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04357563704252243, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03766894340515137, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03611328452825546, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03573125600814819, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02191423438489437, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019252656027674675, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.019086025655269623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017708832398056984, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017485802993178368, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011982982978224754, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012666789814829826, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011381003074347973, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009456133469939232, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2371123731136322, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2218000292778015, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21658393740653992, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19538117945194244, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11168468743562698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10583724081516266, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12575538456439972, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11570324003696442, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11340554058551788, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09983432292938232, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09443381428718567, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06413595378398895, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0553918331861496, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05358316749334335, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.053153086453676224, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03204731270670891, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027547018602490425, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02734651044011116, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025116460397839546, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0248432494699955, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01679346337914467, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01664924807846546, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01604764722287655, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01093271467834711, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22789204120635986, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20338846743106842, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1936316341161728, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.17172396183013916, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10459312051534653, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09477842599153519, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12455662339925766, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11313959956169128, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10781509429216385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08942752331495285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0846603661775589, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06352321803569794, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.054317452013492584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05044841393828392, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.049500755965709686, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03202909976243973, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.026616854593157768, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.026239803060889244, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023666763678193092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023054491728544235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01747220754623413, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017427710816264153, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01613624393939972, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012416037730872631, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11251742392778397, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10241495072841644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0970027893781662, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08580730855464935, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05211988463997841, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04731042683124542, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06346961855888367, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.057928379625082016, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05332787334918976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04521632567048073, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.042796894907951355, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032257843762636185, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027692176401615143, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.025085143744945526, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024436302483081818, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016170568764209747, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013075734488666058, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01273524109274149, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011704420670866966, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011285283602774143, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008506720885634422, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008473527617752552, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007484575733542442, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00570459570735693, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08859581500291824, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08075215667486191, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0744008794426918, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06591163575649261, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0406159833073616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03586973249912262, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05283074826002121, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04835581034421921, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04163482412695885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035578515380620956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03403132036328316, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026643920689821243, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023029334843158722, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01954442448914051, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018657006323337555, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013343213126063347, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010150585323572159, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009659276343882084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00914075504988432, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008551008999347687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0069532496854662895, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0067847673781216145, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005703108850866556, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004306425340473652, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.23588503897190094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21731504797935486, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21030932664871216, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18793873488903046, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1104978695511818, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10302083194255829, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12904831767082214, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11641287058591843, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11278766393661499, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09702865034341812, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09168949723243713, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06596195697784424, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0556892566382885, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.052979569882154465, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05231867730617523, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032939255237579346, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.026960313320159912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02665487304329872, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02414514869451523, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023733362555503845, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01690679043531418, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016081225126981735, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015507807955145836, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009919451549649239, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.23047858476638794, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20754507184028625, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1997402459383011, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.17685341835021973, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10647282004356384, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09762806445360184, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12343332171440125, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11264709383249283, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10942184180021286, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09129495173692703, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08624289184808731, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0635557770729065, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05446691811084747, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05162039399147034, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.050931841135025024, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032160684466362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027536479756236076, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02724253386259079, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024603134021162987, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.024185195565223694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01770622469484806, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018220223486423492, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01660274714231491, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01354956440627575, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14921486377716064, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13908667862415314, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13509449362754822, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1219821497797966, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0702517032623291, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0661851242184639, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08050234615802765, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07385393977165222, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07147515565156937, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06281455606222153, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.059637222439050674, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.041255977004766464, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.035623375326395035, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03399363160133362, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03359835594892502, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02070719376206398, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018037550151348114, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01786090061068535, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016595609486103058, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016357949003577232, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011215098202228546, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011787009425461292, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010586723685264587, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008668264374136925, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2304653823375702, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21601179242134094, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21082395315170288, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19045200943946838, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10856305062770844, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1029263362288475, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1224774569272995, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11272876709699631, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11025681346654892, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09725894778966904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09208337962627411, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06246592849493027, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.053954336792230606, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05208456516265869, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0516422800719738, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03121616318821907, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026783892884850502, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026570552960038185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024472393095493317, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024188373237848282, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016363870352506638, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01622305065393448, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015599383041262627, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010655590333044529, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.22137141227722168, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19615602493286133, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1857890784740448, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1645200252532959, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10125523060560226, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09092222154140472, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12164697051048279, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11051943898200989, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10456230491399765, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08585201948881149, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08149811625480652, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.062072187662124634, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053096771240234375, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04886474460363388, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.047801852226257324, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03136716037988663, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025816943496465683, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02539731375873089, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022837568074464798, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02215651236474514, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01709451898932457, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017034996300935745, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015600196085870266, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01213280949741602, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.09451375901699066, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0860741138458252, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08058028668165207, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07182072848081589, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.043584998697042465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.03912578895688057, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05496824160218239, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05010327696800232, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.044799692928791046, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.038226235657930374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03651423007249832, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.027972383424639702, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02398543246090412, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02105804905295372, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02031412534415722, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014000673778355122, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01107819750905037, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01069879811257124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010028695687651634, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.009558655321598053, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007352572865784168, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007438136730343103, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0062459795735776424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005094896070659161, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08064576238393784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07296339422464371, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06675484031438828, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0595630407333374, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03670423850417137, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03213990479707718, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04896955192089081, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04449133574962616, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.03790385276079178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03227422758936882, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.031158749014139175, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02472233958542347, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.021155552938580513, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.017723940312862396, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.016816414892673492, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01235866267234087, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.009270189329981804, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.008798357099294662, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00839338731020689, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.007816389203071594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006450525019317865, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006342390086501837, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005176356062293053, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004140081349760294, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.19810600578784943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1778765320777893, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16785742342472076, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15006272494792938, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09057648479938507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08148720115423203, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11322088539600372, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10121922194957733, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.0937429815530777, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07879993319511414, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07517059892416, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05773875117301941, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04831235855817795, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04342930018901825, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.042263779789209366, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028810273855924606, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.022316673770546913, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02173406071960926, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01992690935730934, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.01912878081202507, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014814510010182858, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014098452404141426, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.012565637938678265, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008827378042042255, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22552867233753204, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.2017495483160019, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1928141713142395, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.16817517578601837, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1039155125617981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0946061760187149, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12337122857570648, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1120029091835022, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10735949873924255, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08867380768060684, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.08189660310745239, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06351988017559052, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05406031012535095, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.05040477588772774, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0495200976729393, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03190461918711662, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026845427230000496, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026450028643012047, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023907549679279327, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02334984578192234, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017231041565537453, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017875853925943375, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015803799033164978, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01314719021320343, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15248364210128784, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14202307164669037, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13797104358673096, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12474130839109421, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07161736488342285, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06749756634235382, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.082000732421875, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07527218759059906, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07292808592319489, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06412771344184875, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06089954823255539, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.041955847293138504, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0362565703690052, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0346192829310894, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.034218210726976395, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021082641556859016, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018320269882678986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018140306696295738, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01686403714120388, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016623690724372864, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011454393155872822, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011909150518476963, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010826429352164268, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008703060448169708, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2364945411682129, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22147566080093384, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2162860631942749, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19556592404842377, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11130942404270172, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1055959090590477, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12560008466243744, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11548009514808655, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11301060765981674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09982777386903763, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09469335526227951, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06415427476167679, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.055349964648485184, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05347070097923279, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0530255101621151, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03217560797929764, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027716917917132378, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02751282975077629, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02541612647473812, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02513243816792965, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017173759639263153, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017114631831645966, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01642034761607647, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01168007031083107, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.23363423347473145, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.2059624046087265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19476012885570526, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1719246804714203, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10697955638170242, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.09572535008192062, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12937389314174652, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1167321428656578, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.11064736545085907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0901980921626091, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08542827516794205, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06603261083364487, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05613342672586441, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.05171488597989082, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.05062602460384369, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03336792811751366, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.027452079579234123, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02702508121728897, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02422822266817093, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.023518966510891914, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018324747681617737, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018259676173329353, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016754871234297752, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013188610784709454, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10984037071466446, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10061214864253998, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09474356472492218, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08445153385400772, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05094284936785698, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04612448066473007, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06394583731889725, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05783132091164589, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.052213847637176514, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.044862836599349976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04280366376042366, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03263109549880028, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02781178057193756, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02468663640320301, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023899158462882042, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01641099527478218, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013126779347658157, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012732623144984245, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011948351748287678, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011471128091216087, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008788421750068665, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008920619264245033, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007587841711938381, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006380212958902121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09051582962274551, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08153822273015976, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07365143299102783, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06593182682991028, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.041124116629362106, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.035432592034339905, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05566690117120743, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.050919096916913986, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04257351905107498, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.036191683262586594, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.035045698285102844, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028179336339235306, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024288486689329147, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01987583190202713, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01868099719285965, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014094970189034939, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010431173257529736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009815622121095657, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00946477148681879, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00872021820396185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007367385551333427, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007269043941050768, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005790869705379009, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004750768654048443, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21638475358486176, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1949857473373413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18484854698181152, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16617804765701294, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09984812885522842, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09045877307653427, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12396381050348282, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11046279221773148, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10331971198320389, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08717024326324463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08346238732337952, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06356141716241837, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0528581403195858, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04795954003930092, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0467587485909462, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0318087674677372, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.024600353091955185, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02404480054974556, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021996909752488136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021231913939118385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016445213928818703, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015388023108243942, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014059222303330898, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009650878608226776, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19657793641090393, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17588292062282562, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16869351267814636, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14485740661621094, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09047985076904297, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08277177065610886, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10611613094806671, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09611115604639053, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09262038767337799, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07573138922452927, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07061053812503815, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05454259365797043, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04665730148553848, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.044114239513874054, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04346887022256851, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027637610211968422, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02386949583888054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023599082604050636, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021091442555189133, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.020717089995741844, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015461519360542297, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016246158629655838, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014503217302262783, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012517349794507027, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.13873828947544098, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12905415892601013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12516909837722778, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11322275549173355, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06508839130401611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06121338903903961, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07468993961811066, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06868526339530945, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06630638241767883, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.058214105665683746, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05531943961977959, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03821718692779541, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03304852917790413, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.031429946422576904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03103877790272236, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019160127267241478, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0165717676281929, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0163936298340559, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01522979885339737, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01499090064316988, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010294745676219463, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010715200565755367, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009672684594988823, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0077207256108522415, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22311939299106598, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20867246389389038, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20350712537765503, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18399876356124878, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1048387885093689, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09931130707263947, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1186538115143776, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10918927937746048, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10655069351196289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09401624649763107, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08918697386980057, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.060494303703308105, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05230702832341194, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.050364911556243896, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.049902766942977905, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030297446995973587, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026025976985692978, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025808850303292274, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02383267506957054, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023536603897809982, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016025438904762268, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015968509018421173, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015244864858686924, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01071871817111969, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2205604761838913, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1932738870382309, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18158318102359772, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.159881129860878, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.10060084611177444, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08934714645147324, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12322656065225601, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11133188754320145, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10444425046443939, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08454827964305878, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08038011938333511, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06311510503292084, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053711965680122375, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04877500608563423, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04755014181137085, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03201834484934807, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0261555016040802, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.025653323158621788, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023116543889045715, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.022326193749904633, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01776968315243721, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017833039164543152, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.016057372093200684, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013131390325725079, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11069192737340927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10176190733909607, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09567546099424362, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08585433661937714, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05129002407193184, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04647838696837425, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06470432132482529, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05872417986392975, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05260608345270157, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04552391171455383, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04373821243643761, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03298822045326233, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028117593377828598, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02478848397731781, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02394416555762291, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016536451876163483, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013054732233285904, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012625926174223423, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011920678429305553, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011404871940612793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008794596418738365, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008755593560636044, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007551549002528191, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006041026208549738, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08749625086784363, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07976726442575455, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07169658690690994, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06453252583742142, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03986541926860809, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03435856103897095, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.054920949041843414, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05033678188920021, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04114368185400963, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035528358072042465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03457612544298172, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027813974767923355, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023904867470264435, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.019259583204984665, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01801510713994503, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013910166919231415, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010119915008544922, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009457425214350224, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009264918975532055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008483005687594414, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007289989851415157, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007115195505321026, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005650943145155907, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004605318419635296, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22572322189807892, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20558595657348633, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1966344267129898, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17671819031238556, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10472148656845093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09598646312952042, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12723992764949799, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11375381797552109, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1077246367931366, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09192156046628952, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08762907236814499, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06513617187738419, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05440003052353859, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.050185225903987885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.0491131953895092, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032547395676374435, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025571972131729126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02508818730711937, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022951802238821983, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02228699065744877, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016673848032951355, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015626002103090286, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014511526562273502, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009561832994222641, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20774176716804504, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1896759420633316, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18376007676124573, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.160283163189888, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09648532420396805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08976452052593231, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11149216443300247, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10066387802362442, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09831231832504272, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08288373798131943, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0774882361292839, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05758090317249298, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.048961907625198364, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04709091782569885, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04663306847214699, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029116153717041016, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025589795783162117, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02538975514471531, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023047752678394318, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02277260273694992, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016347836703062057, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017422810196876526, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015620573423802853, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013625049963593483, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.13328304886817932, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12417882680892944, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12060238420963287, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.10912773013114929, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06253856420516968, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.05886910855770111, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07146555185317993, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06580962985754013, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06367277354001999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05599517375230789, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.053154829889535904, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.036525413393974304, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03163471445441246, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.030154507607221603, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.02980121225118637, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.018319806084036827, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.015845566987991333, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01568005420267582, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014558069407939911, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01433781161904335, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009837036952376366, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010147583670914173, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.00927337259054184, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007229826878756285, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.21829482913017273, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20441663265228271, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19942206144332886, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18046383559703827, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10265990346670151, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09729171544313431, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1161612793803215, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10686062276363373, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10432685911655426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0921819731593132, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08750458061695099, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.059225983917713165, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0512283518910408, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04934963583946228, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04891163110733032, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029727261513471603, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02562352828681469, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025424402207136154, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023525770753622055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02324855327606201, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015939751639962196, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015905654057860374, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015201673842966557, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010911213234066963, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20769579708576202, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18108783662319183, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16871394217014313, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.14806844294071198, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09467904269695282, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0830240547657013, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11759473383426666, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10642362385988235, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.0983734279870987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07900761067867279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.0752839520573616, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.060246147215366364, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05159220099449158, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.046064767986536026, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04471554979681969, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030726466327905655, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.025027092546224594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024428995326161385, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022115647792816162, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02121727168560028, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017351148650050163, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017527563497424126, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015518426895141602, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013143223710358143, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10972868651151657, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10055844485759735, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09326624125242233, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08419190347194672, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05071139708161354, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04520731046795845, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0664263665676117, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05994617938995361, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05209892615675926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04515477642416954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04368535429239273, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0338921956717968, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02871500700712204, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02458643540740013, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023526543751358986, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017046356573700905, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013102221302688122, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012548929080367088, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012026552110910416, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011368541978299618, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009151959791779518, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00909536425024271, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007603024132549763, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00639007706195116, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.08743523806333542, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07882841676473618, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06922952830791473, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06252843886613846, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.03957861661911011, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03302120789885521, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.057201702147722244, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05193246901035309, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04109501838684082, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03522234782576561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.034595273435115814, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02886105701327324, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024706196039915085, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01915550045669079, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.017631135880947113, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014444449916481972, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010128414258360863, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009304731152951717, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00928233191370964, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008311624638736248, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007568025961518288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007349810097366571, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005584780126810074, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004739547614008188, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22420050203800201, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20064744353294373, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19005730748176575, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17074447870254517, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1025763750076294, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09257720410823822, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12765273451805115, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11394145339727402, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10641133785247803, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08953333646059036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08560604602098465, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06557586789131165, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05440312251448631, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.049267422407865524, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04795241728425026, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03281882405281067, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025204673409461975, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024598009884357452, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02251458913087845, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021679384633898735, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0168303232640028, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015761610120534897, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01420518383383751, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009790049865841866, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22751909494400024, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19800980389118195, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18723106384277344, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15832170844078064, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.10502175241708755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09254427254199982, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12443751096725464, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11337623000144958, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10803736746311188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0844816341996193, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07809002697467804, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0640023723244667, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.054792921990156174, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.050916269421577454, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04999151825904846, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032269902527332306, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.027202043682336807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026744799688458443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02327599562704563, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.022658096626400948, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01748478412628174, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01821727119386196, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01601429469883442, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01345259789377451, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1386604607105255, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12974011898040771, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12639707326889038, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11443132162094116, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06516426056623459, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.061697207391262054, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07379721105098724, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06806806474924088, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06624974310398102, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05850793421268463, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.055510587990283966, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03767181932926178, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03266896679997444, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.031371910125017166, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.031066004186868668, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01887274719774723, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.016387799754738808, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01624211296439171, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0150635140016675, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.014872942119836807, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010073867626488209, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010326546616852283, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009579452686011791, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007239237893372774, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.22707439959049225, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21311894059181213, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20829005539417267, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.18853716552257538, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10672760009765625, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1014929786324501, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12008152157068253, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11051914095878601, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10833022743463516, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09595751017332077, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09111970663070679, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06123355031013489, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05290158465504646, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05121505632996559, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05081593990325928, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03070668876171112, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.026382900774478912, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026193371042609215, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02419378235936165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023941466584801674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01636006310582161, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016018573194742203, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015685226768255234, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010632060468196869, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20593397319316864, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18023915588855743, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16915284097194672, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.14904239773750305, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09383578598499298, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08302582055330276, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11408421397209167, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10382905602455139, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09729517996311188, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07847090065479279, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07462047040462494, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058350879698991776, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049956802278757095, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04535924270749092, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0442001074552536, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029551226645708084, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024110103026032448, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.023655584082007408, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021167781203985214, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.020435107871890068, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0162302665412426, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016186945140361786, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014660760760307312, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011667612008750439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10589924454689026, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09635423868894577, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08909980207681656, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.080376036465168, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.048833880573511124, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04320821538567543, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0638241171836853, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05790048465132713, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05030602589249611, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.043260619044303894, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.041887544095516205, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032557494938373566, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027722053229808807, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02363814413547516, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022582000121474266, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016351018100976944, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012528788298368454, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01200038380920887, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01145222969353199, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010811813175678253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008727394044399261, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008653429336845875, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007226007059216499, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0059968410059809685, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.088678278028965, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0796767994761467, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07035190612077713, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06356040388345718, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04024422913789749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.033690936863422394, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05746110528707504, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05207356810569763, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04183872789144516, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035659633576869965, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03512115404009819, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0291854627430439, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024886388331651688, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.01947653852403164, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018011558800935745, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014539164490997791, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010288256220519543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009534320794045925, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00940253771841526, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008495335467159748, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0076115247793495655, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0074238539673388, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005653075408190489, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004870167933404446, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.21464040875434875, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1901739090681076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1779894232749939, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15959814190864563, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0974867194890976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0864310935139656, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12477350234985352, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11089629679918289, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10163556039333344, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08444739878177643, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08108916133642197, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06427080929279327, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05296500399708748, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.04673760384321213, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.045188937336206436, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03217164799571037, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023954376578330994, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02321101725101471, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021302487701177597, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.020282335579395294, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016497891396284103, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015239275991916656, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01342829130589962, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009378115646541119, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.22767849266529083, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19729478657245636, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18708465993404388, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.15774236619472504, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.1047411859035492, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.09212718158960342, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12128613889217377, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11164462566375732, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10787408798933029, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08319753408432007, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07657631486654282, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.062266379594802856, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.053624704480171204, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0504591166973114, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04970109835267067, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.031209709122776985, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.026446780189871788, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.026116659864783287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02220393344759941, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02169577032327652, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016690950840711594, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016938861459493637, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015393389388918877, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011876133270561695, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.13647142052650452, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12791593372821808, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1248081848025322, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11303679645061493, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06419917196035385, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06089422106742859, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0723947137594223, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0667726919054985, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06518852710723877, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05768819525837898, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05476892367005348, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03695284575223923, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0320403128862381, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0308828242123127, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.030607210472226143, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01852925680577755, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.016095899045467377, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.015974408015608788, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014803546480834484, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01463339477777481, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009928287006914616, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01008324883878231, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.009489274583756924, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00703205494210124, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23295031487941742, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21892313659191132, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2142806053161621, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19400852918624878, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10962499678134918, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10431244969367981, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1226535215973854, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11311949789524078, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11114688962697983, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09861335903406143, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09348597377538681, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06251463294029236, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05413565784692764, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05254625156521797, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05217398703098297, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03128578141331673, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.027010271325707436, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.026839852333068848, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02477770484983921, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02453915774822235, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016548149287700653, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016295170411467552, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01591671071946621, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010732085444033146, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20341284573078156, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.17799702286720276, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16673816740512848, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.14795134961605072, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.0922609344124794, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0815228596329689, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11284370720386505, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10283255577087402, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09585972875356674, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07778551429510117, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07406800985336304, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05731488764286041, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04938657581806183, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.044586263597011566, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04337998479604721, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029053615406155586, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.023643190041184425, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.023153288289904594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02085724100470543, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02007271535694599, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01598035730421543, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015855109319090843, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014364033006131649, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011284193955361843, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10698582231998444, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09679349511861801, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08923786133527756, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08053875714540482, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.049128804355859756, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04329366609454155, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0644892007112503, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05864136293530464, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05087721720337868, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04346885904669762, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04219679906964302, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03295397758483887, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028089171275496483, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02381487376987934, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.02270796149969101, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01657790131866932, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012657571583986282, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012106785550713539, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01155626866966486, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01087760180234909, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008873127400875092, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008800497278571129, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007308194413781166, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006117971148341894, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09569955617189407, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08588102459907532, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07588887959718704, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06858405470848083, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04345211386680603, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03640821948647499, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06183486059308052, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.056169670075178146, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.045202214270830154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03848059102892876, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03777390718460083, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.031422801315784454, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026845866814255714, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021091105416417122, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.01949721947312355, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01568935066461563, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011193911544978619, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010360758751630783, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01023357268422842, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009248211979866028, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00825263187289238, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00811802875250578, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006198285613209009, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005381064955145121, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.212411567568779, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18669770658016205, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17354004085063934, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.15569669008255005, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.09594909846782684, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08416236191987991, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12475064396858215, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11057311296463013, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10035606473684311, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08279216289520264, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.07969943434000015, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06378085166215897, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05282155051827431, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.046057965606451035, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04431450366973877, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03189624473452568, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.023644201457500458, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02282842993736267, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020988784730434418, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.019860785454511642, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016324112191796303, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01524739246815443, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.013100391253829002, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009433653205633163, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.21415497362613678, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18669076263904572, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17574167251586914, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.14161667227745056, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09949443489313126, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.0881006196141243, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11839111149311066, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10817093402147293, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10220624506473541, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07809785008430481, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06867959350347519, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06057766452431679, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05221419036388397, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0483466200530529, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04738203063607216, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030469216406345367, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.025870129466056824, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.025422438979148865, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021817967295646667, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.02120879292488098, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016577797010540962, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017437893897294998, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.015139468014240265, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012969679199159145, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14858309924602509, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13930149376392365, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13596290349960327, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12315301597118378, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06979628652334213, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06623079627752304, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07854939252138138, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07252713292837143, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07085917890071869, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06274735927581787, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05950075015425682, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.040031980723142624, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.034744568169116974, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03351987153291702, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03322583809494972, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020020177587866783, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.017362495884299278, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.017223475500941277, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015941159799695015, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015760987997055054, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010580279864370823, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010707822628319263, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010104555636644363, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007282296195626259, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23685207962989807, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22259214520454407, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2178853154182434, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19729238748550415, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11137749999761581, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10605732351541519, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12435808777809143, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1148795634508133, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11291471123695374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.1002635508775711, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09497108310461044, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06339443475008011, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.054935019463300705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.053341079503297806, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0529620535671711, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03164980933070183, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02733078971505165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02715899981558323, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02506038174033165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024818500503897667, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016553496941924095, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016366440802812576, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015912288799881935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010606678202748299, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2086455523967743, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1839907467365265, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17309215664863586, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15390311181545258, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09505090117454529, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08456729352474213, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11587493121623993, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1052788570523262, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09841396659612656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08053657412528992, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07683239877223969, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058833491057157516, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05050337687134743, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0458720438182354, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04470900446176529, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029771408066153526, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024233074858784676, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02375376969575882, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02142656221985817, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02068261057138443, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016350368037819862, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016091005876660347, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014782939106225967, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011380723677575588, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10819237679243088, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09841948747634888, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09141869843006134, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08251725137233734, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04981124773621559, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04428042471408844, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06456159800291061, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05858222395181656, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.051419880241155624, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04414310306310654, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04270722344517708, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.032910674810409546, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02801566570997238, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024069769307971, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023048758506774902, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016498006880283356, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012652778066694736, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012131890282034874, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011539590544998646, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010900106281042099, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008742772042751312, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008586261421442032, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0072669233195483685, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005793425720185041, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09514984488487244, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08528173714876175, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07673434168100357, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06928534805774689, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04316410794854164, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.036893583834171295, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05916339159011841, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.053969856351614, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.044927023351192474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03813503682613373, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03713501989841461, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03003459796309471, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02574928291141987, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020881926640868187, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019572429358959198, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015038159675896168, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010966009460389614, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01029624231159687, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009967289865016937, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009159009903669357, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007850293070077896, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00768534978851676, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006086054723709822, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005022624507546425, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22772634029388428, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2046179473400116, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19370566308498383, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17412178218364716, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10428119450807571, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09413498640060425, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12858806550502777, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1159578487277031, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10796348750591278, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0911024883389473, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08709965646266937, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06565796583890915, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.055336061865091324, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05003276467323303, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.048655737191438675, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032828424125909805, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025590822100639343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024948477745056152, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022893249988555908, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02202056348323822, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01680973544716835, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0159856416285038, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014344600029289722, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009860307909548283, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.20251508057117462, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17413455247879028, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16455617547035217, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13419005274772644, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09224346280097961, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08127708733081818, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10845284163951874, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09923414885997772, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09576902538537979, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07254500687122345, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06513837724924088, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.055474646389484406, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047719478607177734, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04453251510858536, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04375988990068436, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027794314548373222, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02337493561208248, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02305244281888008, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019535908475518227, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019007978960871696, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014895869418978691, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015138564631342888, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01361888088285923, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01066809706389904, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15348178148269653, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14393845200538635, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14055344462394714, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12724797427654266, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07218638062477112, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06849726289510727, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08137252181768417, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0749148279428482, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07325061410665512, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06484003365039825, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.061604127287864685, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04148135706782341, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.035911232233047485, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03467687964439392, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.034381572157144547, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02081841602921486, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01800484023988247, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.0178704671561718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016535555943846703, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016353921964764595, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011197094805538654, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011158181354403496, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010731502436101437, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007661744952201843, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24050374329090118, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22597511112689972, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22111932933330536, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20019987225532532, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11315974593162537, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10770636051893234, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12647069990634918, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11672629415988922, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11478015780448914, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10173768550157547, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.0963832437992096, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06452218443155289, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.055837277323007584, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05424102768301964, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05385919660329819, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032241735607385635, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02780538983643055, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02762882225215435, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025472618639469147, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02522825263440609, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016987748444080353, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01667056605219841, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01633739098906517, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01084105484187603, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21293148398399353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18690818548202515, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17536409199237823, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15638618171215057, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09679663181304932, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08571892976760864, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11754954606294632, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10761329531669617, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1003483310341835, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08190588653087616, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07822230458259583, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.059947576373815536, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.051601361483335495, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.046681396663188934, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.045462824404239655, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03020327538251877, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024606047198176384, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02411235310137272, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021716097369790077, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02093425765633583, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016318047419190407, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016304276883602142, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014619878493249416, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01143783051520586, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11624139547348022, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10618889331817627, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09870688617229462, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08919870853424072, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05379912257194519, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04784756153821945, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06924105435609818, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06321104615926743, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05529072880744934, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047754012048244476, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.046106692403554916, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03529748320579529, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.030289391055703163, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026028022170066833, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024929672479629517, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01767890714108944, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013742000795900822, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01317804865539074, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012560679577291012, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011883941479027271, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009320042096078396, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009384999983012676, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0077642882242798805, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006438521668314934, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10221188515424728, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0927220806479454, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08320428431034088, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07527461647987366, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04679017886519432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03999101370573044, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06477238237857819, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05907285213470459, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04839862510561943, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04164176061749458, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.04063935950398445, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03283566236495972, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.028255419805645943, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022645197808742523, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02112807147204876, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016474829986691475, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011914372444152832, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01111574750393629, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010897497646510601, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009955953806638718, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008583078160881996, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008425130508840084, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006598229054361582, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005485907196998596, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2348799854516983, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21335265040397644, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2041153609752655, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.18357239663600922, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.1085451990365982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09931724518537521, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1311047524213791, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11835160106420517, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11176947504281998, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09545551240444183, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09091955423355103, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06680139899253845, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05656848102807999, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05202525854110718, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05092030018568039, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03336944058537483, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02658217027783394, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02604866214096546, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.023881714791059494, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023158619180321693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.017101803794503212, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016340751200914383, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01497834362089634, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010054280050098896, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1804867684841156, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16041654348373413, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1503707617521286, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.12552720308303833, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08398870378732681, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07414107769727707, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10246924310922623, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09391874819993973, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08578325062990189, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06845413148403168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.062076929956674576, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05244319513440132, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0450701080262661, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.040511783212423325, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.039377111941576004, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02624792605638504, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02121441438794136, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.0205815602093935, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018162840977311134, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01738414354622364, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013883187435567379, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013920679688453674, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012178104370832443, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009472851641476154, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15421676635742188, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14455077052116394, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14116422832012177, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12774410843849182, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07254186272621155, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06885018944740295, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08151616156101227, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07524870336055756, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07364269345998764, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06512323766946793, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06179136410355568, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.041593436151742935, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.036074504256248474, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03484317287802696, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.034548070281744, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020818419754505157, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01806124858558178, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01792784035205841, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016573000699281693, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016388311982154846, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011064535938203335, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011153239756822586, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010591620579361916, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007610148284584284, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24319128692150116, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2284177988767624, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22356517612934113, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.2024359405040741, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.1144600361585617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.1089683473110199, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12831681966781616, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11801879853010178, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11609745025634766, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10289154946804047, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09763500094413757, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06529786437749863, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05650614574551582, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.054893895983695984, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05451831594109535, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032738979905843735, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028211617842316628, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.02803630195558071, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025852171704173088, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.02560918591916561, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0174054317176342, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01700720004737377, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016758959740400314, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01118969451636076, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21616296470165253, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1901804655790329, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17891159653663635, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.159550741314888, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09826798737049103, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08739694207906723, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11925488710403442, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10885094106197357, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1019306406378746, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08341795951128006, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07952605932950974, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.060419563204050064, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.052132248878479004, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04732826352119446, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04612541198730469, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030487019568681717, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.024824073538184166, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024345217272639275, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021923186257481575, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02115591987967491, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016536159440875053, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016267260536551476, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014913482591509819, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011253255419433117, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.12287140637636185, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.11275459080934525, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10539548844099045, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09505496174097061, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0569598563015461, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.05114218592643738, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07273328304290771, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06617095321416855, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05846934765577316, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05061320215463638, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04884811490774155, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03702262043952942, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.031667400151491165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.027506651356816292, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.026435336098074913, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01854746975004673, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014382457360625267, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013824938796460629, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013121780939400196, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012449607253074646, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009727914817631245, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009611179120838642, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.008166668005287647, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0063974629156291485, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.1030523031949997, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09378354251384735, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08476968109607697, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07665864378213882, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04731617122888565, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04072793573141098, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06440460681915283, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05897406488656998, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04878940060734749, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04208008944988251, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.040948837995529175, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03257300332188606, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02820703014731407, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022812295705080032, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.021377287805080414, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016289247199892998, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011912014335393906, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01114942878484726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010877068154513836, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009972508065402508, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008456696756184101, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008272916078567505, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006568130571395159, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005260961130261421, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2366800159215927, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21568290889263153, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20636892318725586, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.1857164353132248, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10938598215579987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10033595561981201, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13272859156131744, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11922301352024078, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.11247315257787704, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09631886333227158, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09176814556121826, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06771169602870941, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05691235139966011, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05242184177041054, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.05130723491311073, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.033778972923755646, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02673954889178276, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.026214420795440674, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024066226556897163, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.023338787257671356, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01734992116689682, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01640387251973152, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015156695619225502, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010028142482042313, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1935252845287323, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16877113282680511, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15934573113918304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.12902531027793884, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08846656233072281, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07868020236492157, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1074700653553009, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09686390310525894, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09238920360803604, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07116895914077759, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.062807597219944, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0546286515891552, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04651982709765434, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.042583711445331573, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.041608843952417374, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027454668655991554, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.022164221853017807, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02172313816845417, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018731288611888885, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018065502867102623, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014681835658848286, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014239758253097534, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013033613562583923, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009585585445165634, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15688158571720123, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14704807102680206, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14353276789188385, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1298951953649521, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0738847404718399, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07008014619350433, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08310806006193161, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07666155695915222, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07500478625297546, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06626579165458679, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06284824013710022, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04244983196258545, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03676846623420715, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03550971299409866, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03521525859832764, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021281538531184196, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018464718014001846, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018325384706258774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016940219327807426, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.016758527606725693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01140903402119875, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011476087383925915, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010931573808193207, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007921582087874413, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.24516461789608002, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23023416101932526, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22533227503299713, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20383521914482117, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11553435027599335, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10993021726608276, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12878400087356567, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11909520626068115, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1171741783618927, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10374334454536438, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09814643114805222, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06560482084751129, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05695759505033493, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.055345937609672546, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.0549667589366436, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.032757602632045746, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.028328441083431244, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028158225119113922, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02590818889439106, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025661999359726906, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.0170401893556118, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016898082569241524, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.016377603635191917, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010888775810599327, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21682021021842957, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19038529694080353, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17841321229934692, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1595439612865448, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09829267114400864, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08699219673871994, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11981489509344101, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10986857116222382, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.1021382063627243, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08368942886590958, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07993602007627487, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.061049800366163254, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05259180814027786, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0473114512860775, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04600125551223755, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030645592138171196, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02477389946579933, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024226030334830284, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021906530484557152, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021046770736575127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016439342871308327, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016259513795375824, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014638559892773628, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01110030710697174, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.115836501121521, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10572157055139542, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09807895869016647, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0883859246969223, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05358542874455452, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04758956655859947, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06940702348947525, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06323645263910294, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05517685040831566, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04750051721930504, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04590315744280815, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.035326212644577026, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03032361902296543, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02589726820588112, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024767950177192688, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017706923186779022, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.013632137328386307, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013036916963756084, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012438810430467129, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011721246875822544, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009248476475477219, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009286871179938316, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007600622251629829, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00626024603843689, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09859829396009445, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08907461911439896, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07915766537189484, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.07152562588453293, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04497192054986954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03801022469997406, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06341752409934998, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.057811081409454346, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04663209244608879, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.039918411523103714, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.039102282375097275, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.032127682119607925, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.027523953467607498, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.021769236773252487, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.020199354737997055, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01605198159813881, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.011463570408523083, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.01062747836112976, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0104698296636343, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009480491280555725, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00842316448688507, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008174631744623184, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0063874563202261925, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005283653736114502, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2277953028678894, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20399583876132965, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1922001838684082, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17256364226341248, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10410185903310776, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0934617891907692, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13085119426250458, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1170939952135086, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10805251449346542, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09080296009778976, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0870000422000885, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06703800708055496, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05585259944200516, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.049947578459978104, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04846334457397461, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.033492911607027054, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025580650195479393, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024845853447914124, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02286052331328392, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02189376950263977, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01721915975213051, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016120553016662598, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014365555718541145, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00991771835833788, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19501517713069916, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17393840849399567, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16751131415367126, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1383778601884842, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09105335175991058, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08311356604099274, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10302577167749405, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0947640910744667, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09298736602067947, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07316538691520691, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06630616635084152, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05269003286957741, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.045435234904289246, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.04372885450720787, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.043326739221811295, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026330070570111275, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.02256305329501629, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.022371143102645874, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01895853318274021, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018674666061997414, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013841032050549984, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013812389224767685, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013118106871843338, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0092068612575531, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.16163334250450134, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15144231915473938, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14785924553871155, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1336791217327118, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07617221027612686, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.07222387939691544, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08561693131923676, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07904595136642456, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07733181864023209, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06827464699745178, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06465400755405426, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04366939887404442, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03788600489497185, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.036584921181201935, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03628070652484894, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02183721587061882, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018929345533251762, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018786394968628883, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0173330195248127, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017141085118055344, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011529894545674324, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01163945160806179, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011018612422049046, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007880072109401226, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.2451065480709076, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.23011183738708496, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.22519321739673615, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.20361828804016113, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11558843404054642, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10993411391973495, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12924818694591522, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11920559406280518, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.11723794788122177, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10371629893779755, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09813554584980011, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06589030474424362, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05705978721380234, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.05542927235364914, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05503436550498009, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03294112905859947, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02843322977423668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.028254006057977676, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025996772572398186, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.025753922760486603, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017341863363981247, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017059633508324623, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01667437143623829, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01112238597124815, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.2188064157962799, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19258174300193787, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1807120144367218, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.16135355830192566, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09934838861227036, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08816652745008469, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12113673985004425, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11095188558101654, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10315815359354019, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08475411683320999, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08090818673372269, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06173183023929596, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05323682352900505, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04792366921901703, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04659034311771393, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031235577538609505, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02524501085281372, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024701649323105812, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022404856979846954, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.02155890315771103, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01704913005232811, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016752207651734352, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01527282316237688, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011696341447532177, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11977814137935638, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10936165601015091, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10276969522237778, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.09278162568807602, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.0555104985833168, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.05003137141466141, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06972436606884003, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06350421160459518, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05711473524570465, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0492016077041626, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.047223351895809174, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03555822744965553, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03044133260846138, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.026818357408046722, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.025890594348311424, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01783127896487713, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.014066653326153755, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.013608856126666069, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012824013829231262, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.012250986881554127, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009383084252476692, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00937789585441351, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007972200401127338, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006380271166563034, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.10127098113298416, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.09261356294155121, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.08607818186283112, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.0775790885090828, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04675031453371048, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.04163483530282974, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06020013988018036, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05501839891076088, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04808194190263748, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.04154159501194954, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.0399022176861763, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03050137683749199, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026306044310331345, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.022541964426636696, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.02157626301050186, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015261978842318058, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.0117565942928195, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.011250767856836319, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010704120621085167, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.010103670880198479, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007986395619809628, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007855416275560856, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006610345561057329, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005134785547852516, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22034041583538055, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20100493729114532, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.19231221079826355, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.17301775515079498, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10209913551807404, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09369397163391113, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1220756396651268, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11127244681119919, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10514268279075623, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08992312103509903, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08552935719490051, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.062149807810783386, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05313308537006378, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.048923514783382416, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04789632186293602, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.031031077727675438, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.02494194358587265, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.02446097508072853, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022418653592467308, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021740157157182693, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015888463705778122, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015239759348332882, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014091328717768192, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009262263774871826, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19954219460487366, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1752150058746338, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1670997589826584, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.13949918746948242, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09288276731967926, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08364760875701904, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10698036849498749, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09823819249868393, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09541082382202148, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07455912232398987, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.06717701256275177, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.054916467517614365, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04736912250518799, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0449582114815712, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04441187530755997, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027653498575091362, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.023853681981563568, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.023598428815603256, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02029993012547493, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.019930656999349594, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015044832602143288, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015597090125083923, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.01409109216183424, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01143249124288559, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1565065234899521, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1463816910982132, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14276933670043945, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1290198117494583, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07377251982688904, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.0698477178812027, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08315983414649963, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07681205868721008, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07496808469295502, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06602755188941956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06253623217344284, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04249773547053337, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.036848582327365875, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.035490915179252625, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03516482561826706, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02126546949148178, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018419696018099785, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018270334228873253, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016863448545336723, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.01666148193180561, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011281421408057213, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011429795995354652, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010755815543234348, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007831700146198273, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.23504704236984253, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22049082815647125, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21563802659511566, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.19490116834640503, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.11094881594181061, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.10541017353534698, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12403086572885513, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11456212401390076, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.1125318855047226, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09943445771932602, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.09408215433359146, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06327053904533386, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05484898388385773, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.0531965009868145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.05281177535653114, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03163408860564232, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02732202783226967, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.027139414101839066, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02496195212006569, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.024714302271604538, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016608452424407005, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016434526070952415, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.015933603048324585, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010755226947367191, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.21765828132629395, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.191016286611557, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17846234142780304, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.15950487554073334, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09878044575452805, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.08714636415243149, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12239763885736465, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11122733354568481, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.10273867100477219, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08408860862255096, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.08037424832582474, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.062221549451351166, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.053283438086509705, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04765567556023598, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.046242695301771164, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03141067922115326, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02510632388293743, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.024515226483345032, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02224791795015335, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.021335674449801445, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017087310552597046, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016718612983822823, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.015138277783989906, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011621535755693913, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.11058603972196579, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10093049705028534, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09258972853422165, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.0835568979382515, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05116237327456474, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.044869184494018555, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06855642795562744, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06188489496707916, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05268973112106323, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.045417070388793945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04425540193915367, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03499510884284973, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029631925746798515, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024801000952720642, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023519227281212807, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017531149089336395, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.01311219297349453, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012451318092644215, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011987181380391121, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011195910163223743, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00923366378992796, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009117835201323032, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.0074084303341805935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0061866906471550465, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.0922762081027031, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08322522789239883, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07204140722751617, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06526012718677521, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04190431535243988, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03433888405561447, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06199391558766365, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05631579831242561, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04354763776063919, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.037343479692935944, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.03688400611281395, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03130972385406494, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02678784355521202, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020349882543087006, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018501749262213707, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.015681495890021324, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.01074058748781681, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009768741205334663, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009856507182121277, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.00870638620108366, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.008185235783457756, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007915199734270573, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.005888177081942558, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005043762736022472, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22459128499031067, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.20052263140678406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1878606677055359, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.168624147772789, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10278426855802536, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.0913788303732872, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13073505461215973, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11700174957513809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10680215060710907, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08947654813528061, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08587070554494858, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06713172793388367, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05604502558708191, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.049358148127794266, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04768707975745201, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03371012955904007, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025469208136200905, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024652304127812386, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022782376036047935, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02169875241816044, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01732637546956539, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016421504318714142, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014240722171962261, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010399593971669674, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.2147180587053299, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18289247155189514, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17242398858070374, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1419479250907898, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.09821050614118576, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.08640848845243454, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11557142436504364, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1056339368224144, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.10190737992525101, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07584815472364426, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.07031846046447754, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.059148795902729034, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.050729699432849884, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.047387175261974335, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.046565763652324677, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029660597443580627, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.024890149012207985, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.024521570652723312, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020523449406027794, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01996392384171486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015892401337623596, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016094954684376717, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.014571599662303925, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011331923305988312, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.1463376134634018, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13665521144866943, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13302384316921234, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11996988952159882, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.0689588263630867, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06509221345186234, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0781799778342247, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07212605327367783, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07015442103147507, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.061616308987140656, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.058333296328783035, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.039910100400447845, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.034623321145772934, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03318272531032562, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.032838720828294754, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019976824522018433, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.017234250903129578, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01707419566810131, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015761984512209892, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015547585673630238, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010579511523246765, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010742641054093838, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010022333823144436, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007367575075477362, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.21735115349292755, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20355917513370514, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19878584146499634, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.17944015562534332, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.10255326330661774, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.09722065925598145, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11515532433986664, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10632628947496414, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.10413679480552673, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09177737683057785, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.08681958168745041, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05880372226238251, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0509442538022995, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04925280809402466, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04884390905499458, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029421178624033928, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02543390914797783, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.025251900777220726, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023251280188560486, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.022996364161372185, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015553809702396393, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01555755827575922, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.014867596328258514, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010435120202600956, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.19791442155838013, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1724085509777069, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16014443337917328, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.1423783302307129, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.08989259600639343, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0785689651966095, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11157535016536713, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10192922502756119, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09357714653015137, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07569494843482971, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07232645899057388, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05708801746368408, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04909267649054527, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04358596354722977, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.04220624640583992, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028964687138795853, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.02336471900343895, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02278304100036621, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020677128806710243, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.01979050599038601, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016034908592700958, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01607915386557579, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.014189280569553375, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011692618019878864, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1099839061498642, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1002511978149414, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09252044558525085, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.08349940180778503, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.05060674995183945, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04477624595165253, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06805931031703949, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.060594718903303146, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05232428014278412, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04508848115801811, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04377523064613342, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03477190062403679, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029014715924859047, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.024469003081321716, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.023283593356609344, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017419656738638878, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012880627065896988, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012284727767109871, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011795878410339355, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.011060012504458427, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009138357825577259, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008847939781844616, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007289284840226173, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005943118594586849, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09284748136997223, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08507049083709717, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07677552103996277, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06934523582458496, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04260050877928734, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03683333843946457, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.059161387383937836, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05364289507269859, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.04392511025071144, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03811747580766678, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.037242792546749115, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0300472192466259, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02549723908305168, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.020615605637431145, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.019280966371297836, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01502153929322958, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010837141424417496, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.010132919065654278, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009960069321095943, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.009129752404987812, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00785733200609684, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0076293968595564365, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.006047999951988459, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004962887614965439, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2440817654132843, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2237946093082428, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.2144525647163391, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.19320376217365265, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.11347613483667374, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.10462798923254013, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.13670004904270172, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12378465384244919, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.1167343333363533, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.10050836205482483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.09592747688293457, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.07000505179166794, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05927569046616554, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.054509237408638, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.053366050124168396, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03508009389042854, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.027919316664338112, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.027368534356355667, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.025257950648665428, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02450193651020527, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018047083169221878, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01727866567671299, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.015817441046237946, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010817133821547031, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.19378076493740082, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.16340325772762299, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15191447734832764, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.1315000057220459, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.08782536536455154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07513931393623352, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10826439410448074, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09712237864732742, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.09242414683103561, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07036730647087097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.0664052963256836, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0556076280772686, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.046721309423446655, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.0424669124186039, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.04140712320804596, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027883736416697502, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.022422675043344498, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.021968916058540344, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019045710563659668, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.018338516354560852, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014881106093525887, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01488988846540451, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.013062048703432083, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010540680028498173, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14688965678215027, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13696496188640594, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13316583633422852, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12017136812210083, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06920379400253296, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06523469090461731, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07859697937965393, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07252342998981476, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07041230797767639, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0617540143430233, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05845574289560318, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.040185026824474335, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03484804928302765, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03334219753742218, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.032979123294353485, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020122036337852478, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.01741209253668785, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.01724116876721382, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015931114554405212, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015701301395893097, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010727534070611, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011000123806297779, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.010147497989237309, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007691284641623497, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.19484496116638184, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18218161165714264, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1777070164680481, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.1605137586593628, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.09259376674890518, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.08768198639154434, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10431402921676636, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09621496498584747, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.09407298266887665, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0828661322593689, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.07850818336009979, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.053818780928850174, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.047018278390169144, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.04538603872060776, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.04501398280262947, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.027118079364299774, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.02502492070198059, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.024859003722667694, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023249579593539238, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.023024216294288635, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015263354405760765, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01741226762533188, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01466925349086523, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.014005102217197418, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.20326420664787292, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.17870324850082397, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16784390807151794, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.14811286330223083, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.09277607500553131, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0823347195982933, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11321542412042618, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1028890535235405, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.09599296003580093, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07809051871299744, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.07390493154525757, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.057450611144304276, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049342211335897446, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.04477272555232048, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.0436292439699173, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028842363506555557, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.0236456748098135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.023168006911873817, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020811384543776512, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.020073026418685913, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015525024384260178, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01570248417556286, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.01394063699990511, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011086788959801197, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10738027840852737, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09744800627231598, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08834647387266159, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07964441180229187, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.049461349844932556, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.042701393365859985, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06796197593212128, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.06114042550325394, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.05113532021641731, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04378144070506096, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04282741621136665, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034707292914390564, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029337221756577492, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02402503974735737, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.022611862048506737, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017412202432751656, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012789050117135048, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.012060937471687794, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011690106242895126, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010814369656145573, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009217938408255577, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009092123247683048, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.007216623518615961, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006227383390069008, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09343623369932175, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0843484178185463, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07285860925912857, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06570488959550858, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.0424746498465538, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.034692198038101196, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06380729377269745, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05730936676263809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.044212087988853455, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03783685341477394, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.037388019263744354, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03243028372526169, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02737581543624401, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02067255973815918, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018760021775960922, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.016249271109700203, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010987645015120506, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009993120096623898, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010089530609548092, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008916824124753475, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00848429650068283, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008184653706848621, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.00597103638574481, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005350899882614613, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.22151266038417816, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1964537352323532, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18281427025794983, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16382917761802673, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10076481848955154, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.08872820436954498, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1306838095188141, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11668607592582703, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10507212579250336, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08745139092206955, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08439332991838455, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0673912912607193, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05588524043560028, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.048506900668144226, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.046616584062576294, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03385314717888832, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025144319981336594, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024204256013035774, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02248755842447281, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.021239198744297028, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01755439303815365, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016537923365831375, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.014080939814448357, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010584202595055103, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.1707805097103119, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15180814266204834, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14607641100883484, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.12197425961494446, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.079169362783432, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.07219121605157852, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09051025658845901, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08219337463378906, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.08083779364824295, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06393942981958389, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.05879652500152588, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.046517256647348404, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.03977528214454651, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.038379862904548645, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.0380486324429512, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02338944748044014, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.020517777651548386, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.02037370577454567, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.017623065039515495, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01739945076406002, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.012879305519163609, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.013539031147956848, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.012316263280808926, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010198423638939857, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14767026901245117, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13778524100780487, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13412019610404968, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12112760543823242, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07007667422294617, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06612487137317657, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07940924912691116, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07319111377000809, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07118497788906097, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06260465085506439, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05938813462853432, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0410630889236927, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03578200563788414, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03439704701304436, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03406120091676712, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020716898143291473, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.018942173570394516, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018788008019328117, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017562827095389366, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.017371663823723793, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011689378879964352, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013178888708353043, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.011181783862411976, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010563448071479797, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.15098850429058075, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14114192128181458, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13761451840400696, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.12424298375844955, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.07168415188789368, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06780296564102173, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08084028214216232, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07449180632829666, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.07279670983552933, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06408988684415817, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.06075015291571617, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0417402908205986, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.036306820809841156, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03503447026014328, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03474501147866249, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021052883937954903, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.019127171486616135, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.018994107842445374, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01771462708711624, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.0175386480987072, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011831996031105518, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013102388940751553, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.01135329995304346, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010349255986511707, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.17007990181446075, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.14989356696605682, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.14157111942768097, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.12362828105688095, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.0781470537185669, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.07008060812950134, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09530726075172424, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.08492332696914673, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.080315962433815, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.06531884521245956, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.06149745360016823, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.048132240772247314, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04121048375964165, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.0382494293153286, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.03752138093113899, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.024589156731963158, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.021041901782155037, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.02074293978512287, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.01869780384004116, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.018240299075841904, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01408932264894247, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01479358971118927, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.013116066344082355, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011606404557824135, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10286528617143631, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09318521618843079, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08442991226911545, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07604271173477173, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04728877544403076, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.040793340653181076, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0646870881319046, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05858222767710686, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.048975907266139984, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.041841696947813034, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.04078228399157524, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0330142043530941, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028079945594072342, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.022932756692171097, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.021572284400463104, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016550248488783836, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.012193838134407997, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01148836687207222, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0111371586099267, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.010283983312547207, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008716823533177376, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008655390702188015, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.006802101619541645, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005877818912267685, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.09174560010433197, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0825527161359787, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07172185182571411, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.06474436819553375, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.04167669266462326, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.03433135151863098, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06228627264499664, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05558128282427788, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.043425023555755615, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.037004053592681885, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.036603011190891266, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03155605122447014, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.026471897959709167, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.02029496431350708, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.018534261733293533, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01581069640815258, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.010805556550621986, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.009915297850966454, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009903578087687492, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.008835950866341591, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00829244963824749, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008001583628356457, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.0059729511849582195, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005318560171872377, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.18115234375, + "total_bits": 9148416.0, + "err": 0.2292117029428482, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.2009010910987854, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1854064166545868, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.73583984375, + "total_bits": 11474944.0, + "err": 0.16630704700946808, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.22802734375, + "total_bits": 13539328.0, + "err": 0.10398866981267929, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.73583984375, + "total_bits": 15669248.0, + "err": 0.09034343808889389, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1358235776424408, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.12171792984008789, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.18115234375, + "total_bits": 13342720.0, + "err": 0.10907156020402908, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08950412273406982, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.672607421875, + "total_bits": 15404032.0, + "err": 0.08654343336820602, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06982249766588211, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.0582081601023674, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.22802734375, + "total_bits": 17733632.0, + "err": 0.05002761632204056, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.32958984375, + "total_bits": 18159616.0, + "err": 0.04790128022432327, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03505036234855652, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.22802734375, + "total_bits": 21927936.0, + "err": 0.025840768590569496, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.34521484375, + "total_bits": 22419456.0, + "err": 0.024829400703310966, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.022921143099665642, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.73583984375, + "total_bits": 24057856.0, + "err": 0.02155398577451706, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.018015488982200623, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016979821026325226, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.2816162109375, + "total_bits": 26347008.0, + "err": 0.01435985416173935, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010665581561625004, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1800537109375, + "total_bits": 36575232.0, + "err": 0.10373758524656296, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09148409217596054, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08770589530467987, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7347412109375, + "total_bits": 45881344.0, + "err": 0.07279331237077713, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2269287109375, + "total_bits": 54138880.0, + "err": 0.04685685783624649, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7347412109375, + "total_bits": 62658560.0, + "err": 0.04213209077715874, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05644654855132103, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04940211400389671, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1800537109375, + "total_bits": 53352448.0, + "err": 0.04815205559134483, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03844540938735008, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.67205810546875, + "total_bits": 61606912.0, + "err": 0.03512616083025932, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02833298221230507, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025424472987651825, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.2269287109375, + "total_bits": 70916096.0, + "err": 0.02438650280237198, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.3284912109375, + "total_bits": 72620032.0, + "err": 0.024137938395142555, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014999414794147015, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.2269287109375, + "total_bits": 87693312.0, + "err": 0.015262829139828682, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.3441162109375, + "total_bits": 89659392.0, + "err": 0.01517049316316843, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0139964884147048, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.7347412109375, + "total_bits": 96212992.0, + "err": 0.01384108979254961, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009389455430209637, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.012347601354122162, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281341552734375, + "total_bits": 105383424.0, + "err": 0.009020663797855377, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011142071336507797, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.14152131974697113, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.132565438747406, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12936417758464813, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.11671842634677887, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.06686107814311981, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.06331279128789902, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07538492232561111, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06944017857313156, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.06783707439899445, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0597323402762413, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.05653066188097, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.038482993841171265, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03329363837838173, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.03212679177522659, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.03184983879327774, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01925075240433216, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.016633324325084686, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.016496941447257996, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015192207880318165, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.015018255449831486, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010140685364603996, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010249095968902111, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.00966884195804596, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006949395872652531, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1797921316964284, + "total_bits": 127997951.99999999, + "err": 0.09199002385139465, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.08594977110624313, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.08379561454057693, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7344796316964284, + "total_bits": 160569344.0, + "err": 0.07555834949016571, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2266671316964284, + "total_bits": 189470720.0, + "err": 0.04339141398668289, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7344796316964284, + "total_bits": 219289600.0, + "err": 0.04105411469936371, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.049009546637535095, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.04509451612830162, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1797921316964284, + "total_bits": 186718208.0, + "err": 0.04407262057065964, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.03871450573205948, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6719273158482144, + "total_bits": 215616512.0, + "err": 0.03668099269270897, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.02510147914290428, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.021709326654672623, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.226667131696429, + "total_bits": 248190976.0, + "err": 0.020940959453582764, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.328229631696429, + "total_bits": 254154752.0, + "err": 0.020758388563990593, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.012605070136487484, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.226667131696429, + "total_bits": 306911232.0, + "err": 0.0110295619815588, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.343854631696429, + "total_bits": 313792512.0, + "err": 0.010943029075860977, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.010112276300787926, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.734479631696429, + "total_bits": 336730112.0, + "err": 0.010002316907048225, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.006834006868302822, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00706438347697258, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.281276157924107, + "total_bits": 368838144.0, + "err": 0.006540374830365181, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.005113176070153713, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b_32g/0.95:2b_32g s4", + "bpw": 2.1767054966517856, + "total_bits": 127816704.0, + "err": 0.10704035311937332, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b_32g/0.75:2b_32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.09573950618505478, + "qparams": { + "group_size": { + "3": 32, + "2": 32 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b_32g/0.75:2b_32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.09030169993638992, + "qparams": { + "group_size": { + "4": 32, + "2": 32 + }, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", + "bpw": 2.7280447823660716, + "total_bits": 160191488.0, + "err": 0.0787477046251297, + "qparams": { + "group_size": { + "4": 32, + "3": 32, + "2": 32 + }, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b_32g/0.9:3b_32g s4", + "bpw": 3.2258126395089284, + "total_bits": 189420544.0, + "err": 0.04958648607134819, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b_32g/0.8:3b_32g s4", + "bpw": 3.7280447823660716, + "total_bits": 218911744.0, + "err": 0.0447072759270668, + "qparams": { + "group_size": { + "6": 32, + "3": 32 + }, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.06175757944583893, + "qparams": { + "group_size": { + "3": 128 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b_32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.054887935519218445, + "qparams": { + "group_size": { + "3": 32 + }, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b_32g/0.95:3b_32g s4", + "bpw": 3.1767054966517856, + "total_bits": 186536960.0, + "err": 0.05094762519001961, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b_32g/0.6:3b_32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.042055875062942505, + "qparams": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b_64g/0.4:3b_64g s4", + "bpw": 3.6653616768973216, + "total_bits": 215230976.0, + "err": 0.03987801820039749, + "qparams": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.03181779757142067, + "qparams": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b_32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.026863282546401024, + "qparams": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b_32g/0.9:4b_32g s4", + "bpw": 4.225812639508929, + "total_bits": 248140800.0, + "err": 0.024511056020855904, + "qparams": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:4b_32g s4", + "bpw": 4.326259068080357, + "total_bits": 254039039.99999997, + "err": 0.023902757093310356, + "qparams": { + "group_size": { + "6": 32, + "4": 32 + }, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b_128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.01647721230983734, + "qparams": { + "group_size": { + "5": 128 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b_32g/0.9:5b_32g s4", + "bpw": 5.225812639508929, + "total_bits": 306861056.0, + "err": 0.013803635723888874, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", + "bpw": 5.330723353794643, + "total_bits": 313021440.0, + "err": 0.013558349572122097, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b_32g/0.6:5b_32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.012474635615944862, + "qparams": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", + "bpw": 5.728044782366071, + "total_bits": 336352256.0, + "err": 0.012123570777475834, + "qparams": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.009744579903781414, + "qparams": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b_32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.010115188546478748, + "qparams": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b_128g/0.9:6b_128g s4", + "bpw": 6.245627267020089, + "total_bits": 366744832.0, + "err": 0.008965478278696537, + "qparams": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b_32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.008202197030186653, + "qparams": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 66, + "base_perplexity": 6.4102726653449995 +} \ No newline at end of file