diff --git "a/measurement.json" "b/measurement.json" new file mode 100644--- /dev/null +++ "b/measurement.json" @@ -0,0 +1,119846 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.01611865684390068, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.015945566818118095, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008271468803286552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008224260061979294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.008175279013812542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.005121907219290733, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02400517277419567, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.015788685530424118, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.008194300346076488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.008149000816047192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.00840780045837164, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009443790651857853, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008135282434523106, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005797794088721275, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0050962925888597965, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.005828194320201874, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.005087511148303747, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.004846198484301567, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005085951182991266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004844465292990208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004989502485841513, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005085372366011143, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004694769624620676, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0048430608585476875, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.018150659278035164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.01788152940571308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008610905148088932, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.00853341817855835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.008454401046037674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004084131680428982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.022038668394088745, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.017671285197138786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.00849489588290453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.00840325653553009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.008705811575055122, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009263250045478344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008381054736673832, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005150947254151106, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.004022156819701195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.00514684384688735, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003996668849140406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.003577030496671796, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.003992010373622179, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.003570862812921405, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003746309783309698, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.003990586847066879, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0032043533865362406, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0035667966585606337, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06581752002239227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.054839491844177246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.044724345207214355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0377509742975235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.029422134160995483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02183348499238491, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.051490623503923416, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.040736693888902664, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.031253427267074585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.023974545300006866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0241555105894804, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02737727202475071, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01984996534883976, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.014609022997319698, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.013072021305561066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013832101598381996, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008231368847191334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.007537313736975193, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007276984862983227, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.00634804368019104, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007169797085225582, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0066187577322125435, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004428199492394924, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004894275218248367, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.020956797525286674, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.012053387239575386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.006709914188832045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.00845959410071373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.00816323235630989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0031518053729087114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0129030030220747, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.011565589345991611, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0098660783842206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.00581701984629035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.006210155785083771, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006581040099263191, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.005705139599740505, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.004284969065338373, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0038922426756471395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0034266235306859016, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.002771056490018964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0026725090574473143, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.002450241707265377, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0022361038718372583, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002083573956042528, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.002437480492517352, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0016884319484233856, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.002100570360198617, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.0711153894662857, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.06638137996196747, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.06484030187129974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.058712225407361984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.03181963041424751, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.030362101271748543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.035531919449567795, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0328960046172142, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.03228330612182617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.02887393906712532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.027428239583969116, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.018039971590042114, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.015858536586165428, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.015406458638608456, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.01529866736382246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.009099426679313183, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.008449142798781395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.008414654061198235, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.00792557280510664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.007860897108912468, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.005121325142681599, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0058247679844498634, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.004968233872205019, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.004655458498746157, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.10628111660480499, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.09927809983491898, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.09712346643209457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.08795296400785446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.04752698913216591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.045401688665151596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.052987199276685715, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0489688366651535, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0481833778321743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.043090175837278366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.040961459279060364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.02672564424574375, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02324601821601391, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.02262124978005886, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.022476056590676308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.013345947489142418, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.011787100695073605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.011740759946405888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0109331626445055, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.010849835351109505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.0071328748017549515, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.007359788287431002, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.006914212368428707, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005172839388251305, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.05888640880584717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.049149785190820694, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.03318430110812187, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.030229218304157257, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.02485620602965355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.01633765920996666, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.04808812588453293, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.04162061586976051, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.026674626395106316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.021476106718182564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.021444806829094887, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.021028362214565277, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.019317038357257843, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.01337199006229639, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.011716771870851517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.012385855428874493, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0092026237398386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.008128250017762184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.008766809478402138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.007505335379391909, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.008482229895889759, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.008509406819939613, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0068030813708901405, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.006940066814422607, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.016099177300930023, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.012771064415574074, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.00982070341706276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.008610192686319351, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006908757612109184, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.004648404195904732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.01253475435078144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.009959480725228786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.007503448985517025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.005651453509926796, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.006034377031028271, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.006647436413913965, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004904598463326693, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0034498930908739567, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.002985029947012663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0034245329443365335, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0019522473448887467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0017420697258785367, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0017380638746544719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.001455760095268488, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0018269091378897429, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0016206048894673586, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0011346053797751665, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0011699693277478218, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.014827030710875988, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.011538898572325706, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.008905249647796154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.00780138187110424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.006222925148904324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0042534261010587215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.010092516429722309, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.008905651979148388, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0068292925134301186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.004990290384739637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.005084823817014694, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.00513160414993763, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.004276178311556578, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0030802455730736256, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0027260964270681143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0025810063816607, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0017351736314594746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00158730149269104, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0015216452302411199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.001306044403463602, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.001392825273796916, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0014086903538554907, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0009742011316120625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00103134720120579, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08829569816589355, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07431085407733917, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06826536357402802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05851709097623825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03951870650053024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03365742042660713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04913372918963432, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04514669254422188, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04153493419289589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.031842075288295746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02976324036717415, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025124546140432358, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02167450450360775, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01909777522087097, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.018453076481819153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012590257450938225, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01009555347263813, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00985889695584774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008616703562438488, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008183561265468597, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00666216854006052, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006816364824771881, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005775123834609985, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004750227555632591, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1480581909418106, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09344519674777985, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06761407107114792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06524050980806351, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06397543102502823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03643788397312164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0873354896903038, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.07791420817375183, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.07045529782772064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.039873622357845306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0419822633266449, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04516734182834625, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.038473352789878845, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.032256659120321274, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03062393143773079, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023149894550442696, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018945973366498947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.018503796309232712, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01517239399254322, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01412817183881998, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013505066744983196, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014988968148827553, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011654207482933998, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012402052991092205, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.10716817528009415, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.10133105516433716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.09957893192768097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.0906321331858635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.04759899899363518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.04595794901251793, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.05263233184814453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.04860535264015198, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.04808470979332924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.04376998543739319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.041828613728284836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.026670396327972412, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.02321656420826912, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.022797666490077972, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.02269657701253891, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.013383346609771252, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.012221980839967728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.012195488438010216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.011525616981089115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.011470801196992397, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.007435097824782133, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.008070355281233788, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.007296656724065542, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006206778809428215, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.14285360276699066, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.13549505174160004, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13331705331802368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.12181022763252258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.06370848417282104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06163949891924858, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07018011063337326, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06494401395320892, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06432177126407623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05876350775361061, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05608568340539932, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03524104878306389, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.030640752986073494, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03011752851307392, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.029994888231158257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.017541302368044853, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.015331551432609558, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015295935794711113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.014354554004967213, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.014276525937020779, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.009115290828049183, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.009002509526908398, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0089333551004529, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.005732610821723938, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.12631794810295105, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.11401350051164627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.10303351283073425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.0938413217663765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.057894740253686905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.04760144278407097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.07969684153795242, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.07078348845243454, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.05960455909371376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.052332401275634766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.05064116418361664, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.040953077375888824, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.03700299561023712, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.027228474617004395, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.02522406354546547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.019565550610423088, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.014790123328566551, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.0138318482786417, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.013781671412289143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.012575407512485981, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.011357787996530533, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.011052543297410011, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.009359482675790787, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.007841968908905983, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.034410640597343445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03143605589866638, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.030099518597126007, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.026735415682196617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.015585148707032204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.01436425931751728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.018930044025182724, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0168845746666193, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.015930037945508957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.013693798333406448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.013001891784369946, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.009682061150670052, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.00810900516808033, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.007489445153623819, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0073303538374602795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.004897081758826971, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0039282431825995445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00385926547460258, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0035632571671158075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.003462929744273424, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0026185684837400913, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0025575601030141115, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0023247685749083757, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0017810791032388806, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0272363368421793, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.024766257032752037, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.023722399026155472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.021037466824054718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.012273409403860569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.011329285800457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.014532300643622875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.013269525952637196, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.012581842951476574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.010743383318185806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.01011596154421568, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.007332863751798868, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.006321488413959742, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.005881930235773325, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.005772294010967016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0036759101785719395, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.003067431040108204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0030268123373389244, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0027688986156135798, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.002700401935726404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0019517461769282818, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.001961600035429001, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0017940005054697394, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0013471184065565467, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13785097002983093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.127851203083992, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1246202141046524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1119353324174881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06338807940483093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05993565171957016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07110445201396942, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06570425629615784, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06447656452655792, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.056444860994815826, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.053149424493312836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0360264852643013, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03128056600689888, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030228974297642708, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029987093061208725, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017968792468309402, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01540353149175644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015328390523791313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013981624506413937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013821546919643879, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009271493181586266, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009127791039645672, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008891521953046322, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005727093666791916, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.17027688026428223, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.13836537301540375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12687785923480988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10005426406860352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.07631829380989075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06459616124629974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09376075863838196, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08506322652101517, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08066685497760773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05658874660730362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05158667266368866, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04850300773978233, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04162978380918503, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03780826926231384, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.036871664226055145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.024973295629024506, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.021415583789348602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021176796406507492, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01804308220744133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01746039092540741, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01473592221736908, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015835406258702278, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013640844263136387, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012937705032527447, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16649030148983002, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15782621502876282, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.155284121632576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1422024965286255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.076182059943676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0735483393073082, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08413589000701904, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07760901749134064, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07694733887910843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0700145810842514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06727100163698196, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.042835161089897156, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03706156089901924, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.036440350115299225, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03628166764974594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0214708112180233, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01908060722053051, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01903967186808586, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.017899420112371445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01781124249100685, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011699576862156391, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01198900118470192, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011499284766614437, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008618896827101707, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20090535283088684, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1906849592924118, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18768222630023956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17208231985569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09208628535270691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08895103633403778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10173869878053665, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09381573647260666, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09304047375917435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08471135795116425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08128818869590759, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05145090073347092, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044562023133039474, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043805159628391266, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.043637994676828384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025702916085720062, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022370504215359688, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022327907383441925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02089657448232174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020787479355931282, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013556701131165028, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013218013569712639, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013311927206814289, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008536302484571934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.19379547238349915, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.17706182599067688, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.17083972692489624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.15563449263572693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.08718342334032059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.08122988045215607, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.10086078941822052, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.09277018904685974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.08957982808351517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.07813718169927597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.07502859830856323, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.05117589235305786, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.04424094781279564, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04176231846213341, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04115893691778183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.025678440928459167, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.021851949393749237, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.021723946556448936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.020109405741095543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.019732369109988213, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.013911084271967411, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.013977307826280594, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.013131072744727135, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.009800972416996956, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.04825674369931221, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04405873641371727, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.042337555438280106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.03810757398605347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.022207897156476974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02033567801117897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02638898231089115, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.023572443053126335, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02261519804596901, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.019421398639678955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.01850179210305214, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.013545329682528973, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01133540365844965, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.010736054740846157, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.010593023151159286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.006852925289422274, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.005609966814517975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.005551437381654978, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005012304987758398, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.004921289626508951, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003660147776827216, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0035400628112256527, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0034719835966825485, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0024423659779131413, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.03798632323741913, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.03495771810412407, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.03357279300689697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.03002476692199707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.017376434057950974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.016131218522787094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02052186243236065, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.018703320994973183, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.01770785264670849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.015347383916378021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.014600168913602829, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.010390500538051128, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.008929986506700516, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.008348856121301651, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.008209053426980972, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0052103567868471146, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00437560398131609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.00431783078238368, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.003980761859565973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.0038927162531763315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.002776486799120903, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.002818624023348093, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.002579114632681012, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0019767871126532555, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.17761123180389404, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1654166728258133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1615065038204193, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14558625221252441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08228936046361923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07805894315242767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09271856397390366, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08506479859352112, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08355356752872467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07354266941547394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0696493536233902, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.04700621962547302, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.040559299290180206, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03928583115339279, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.038979414850473404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.023489350453019142, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02001827210187912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.019924869760870934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01822138950228691, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01803647167980671, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01217436883598566, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011831553652882576, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011694586835801601, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007425446063280106, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12695784866809845, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1097988560795784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09941259771585464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08219702541828156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05782853066921234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049326468259096146, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.08207778632640839, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06940598785877228, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06003695726394653, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04765299707651138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04388287290930748, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.042115215212106705, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.034859541803598404, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029695725068449974, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028290873393416405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02253514714539051, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.018237164244055748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.017675280570983887, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.016653545200824738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015899786725640297, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013908573426306248, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014932814054191113, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012114757671952248, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012869827449321747, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17996956408023834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17019398510456085, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1672438234090805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15279291570186615, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0839633047580719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08076925575733185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09281770884990692, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08581268787384033, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08492191135883331, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07668060809373856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0733950287103653, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047561079263687134, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0414208360016346, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04063794016838074, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040437377989292145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02385886199772358, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021804096177220345, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021746978163719177, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02043103240430355, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02031479962170124, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01317545771598816, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014390292577445507, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01292810682207346, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01104068011045456, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2224755734205246, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21043911576271057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2069682776927948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18913519382476807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10354425758123398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09961490333080292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11448032408952713, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10574395954608917, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1046956479549408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0945504680275917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09041779488325119, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05822107195854187, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.050496071577072144, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.049537282437086105, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04930897429585457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.029105551540851593, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0256098210811615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025555899366736412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023816289380192757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023678643628954887, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015485898591578007, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015602550469338894, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015173396095633507, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010653667151927948, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.0762411579489708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.07193819433450699, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.045083094388246536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.041544508188962936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.03275832161307335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.018920766189694405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.06686056405305862, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.06287572532892227, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.033374082297086716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.03060300648212433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.03030942752957344, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.028730982914566994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.02718586102128029, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.01027278695255518, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.01091720350086689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.006925850175321102, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.007252016570419073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.005606325343251228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0068143983371555805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.004957662429660559, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.005986183416098356, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.006079324521124363, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.0036265759263187647, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.003255607560276985, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06208787485957146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.05685688555240631, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05476383492350578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.048893824219703674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.028383532539010048, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.026383649557828903, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03285443037748337, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.030281832441687584, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.028983240947127342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.024977555498480797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.023575983941555023, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016645316034555435, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.01444238144904375, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.013601468876004219, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01339939795434475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008320181630551815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.007041222415864468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0069605628959834576, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.006365051493048668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006233029067516327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.004353508353233337, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0044028437696397305, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004068559501320124, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0029559023678302765, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.05413151904940605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.04956419765949249, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.04763616621494293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04251779615879059, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.024735024198889732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.022959323599934578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.02889774553477764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.026512790471315384, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.025284642353653908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.021767649799585342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.020612573251128197, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.014636675827205181, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.012637822888791561, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.011853453703224659, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01166237611323595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.007316018920391798, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006124828942120075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.006049131043255329, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.005532471928745508, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.005411652848124504, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.003822671715170145, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0038249404169619083, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.003557938849553466, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0025413217954337597, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18376865983009338, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1707392930984497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16654819250106812, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14992539584636688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08537092059850693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08074228465557098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.09592984616756439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.08849479258060455, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08680708706378937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0760519802570343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07176630198955536, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.048679426312446594, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.042194563895463943, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04077290743589401, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04043689742684364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.024267276749014854, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.020760947838425636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.020653190091252327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018843496218323708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018625319004058838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0125154173001647, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.012278134003281593, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.011996755376458168, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.007666983176022768, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1902093142271042, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16533781588077545, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15498919785022736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13405361771583557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08831623196601868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07735628634691238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1068587377667427, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09718610346317291, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09147145599126816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07244189083576202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0669308602809906, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.055422864854335785, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04756183177232742, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04348289966583252, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04249528795480728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028152676299214363, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02413533814251423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02377842739224434, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02126835659146309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020647509023547173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016048379242420197, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01730944588780403, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014802560210227966, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013738615438342094, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1884520798921585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17774906754493713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17456823587417603, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15915486216545105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08791736513376236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08430810272693634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0970066636800766, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0898556038737297, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0889790803194046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07983828336000443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07599702477455139, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.049280937761068344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04290048032999039, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04202685132622719, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0418209508061409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024607377126812935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021641818806529045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02158418297767639, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020024534314870834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01988956891000271, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012912615202367306, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013062824495136738, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012623060494661331, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008739781565964222, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23311370611190796, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2199610024690628, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2160913050174713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1971069574356079, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10873813182115555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10436506569385529, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11993976682424545, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11111648380756378, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11006149649620056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09878244251012802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09401051700115204, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.060811612755060196, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05294094979763031, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.051871247589588165, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05162498727440834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030327828601002693, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026337597519159317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0262797512114048, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02429351583123207, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0241340734064579, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.015646591782569885, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015341853722929955, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015287660993635654, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009517781436443329, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22870180010795593, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2069704383611679, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19911830127239227, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17821891605854034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10460393130779266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09661839157342911, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12067336589097977, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11121897399425507, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10757279396057129, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09156592935323715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08639363944530487, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.061372559517621994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05315445363521576, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05019419267773628, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04947007820010185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.030743392184376717, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0261201374232769, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.025996694341301918, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023559192195534706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02310927025973797, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.016467954963445663, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.016504378989338875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01550917886197567, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011345556005835533, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0707162544131279, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06478503346443176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.062224164605140686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.055524859577417374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0324147492647171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.030053680762648582, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03762605041265488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.034782711416482925, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03310343623161316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.028474275022745132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.026836972683668137, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.01907014101743698, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.016620948910713196, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01556678581982851, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.015314251184463501, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009545831941068172, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008118104189634323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008013526909053326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00734133692458272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007179728243499994, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005028133746236563, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005173767451196909, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0046792286448180676, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.003562202211469412, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.06035004183650017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.05525514855980873, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.05296366661787033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.04726802185177803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.027675824239850044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.02560550346970558, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03253233805298805, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.029895195737481117, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.02827729471027851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.02432638593018055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02304348163306713, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.016491305083036423, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.014271325431764126, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.013288033194839954, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.013051044195890427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.008252527564764023, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.006902501918375492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0068017891608178616, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0062371548265218735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.006084728986024857, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0043326434679329395, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.004375786054879427, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004005491267889738, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0029588881880044937, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19349981844425201, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17955812811851501, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1750190109014511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15740323066711426, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09003337472677231, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08501927554607391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10140800476074219, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0934782326221466, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09157891571521759, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08001960068941116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07548366487026215, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05149412900209427, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04459955543279648, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.043040696531534195, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04266628250479698, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.025702811777591705, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02192617580294609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021808061748743057, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01986566185951233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019624697044491768, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.013279924169182777, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013001583516597748, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.012700705789029598, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008144048973917961, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1934305727481842, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1684076488018036, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15932483971118927, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13263314962387085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0900251567363739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08016563206911087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10547970235347748, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09654466062784195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0927124172449112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07255184650421143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0644996240735054, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.054714102298021317, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.047058526426553726, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.044112179428339005, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04340628907084465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02771417610347271, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024237964302301407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024008141830563545, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02106194943189621, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020600564777851105, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01566464453935623, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016960542649030685, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014749827794730663, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013360030017793179, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18531681597232819, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17433053255081177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17104749381542206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15553000569343567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0867839902639389, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08301714807748795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09597687423229218, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08889289200305939, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08791519701480865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07844877243041992, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07456165552139282, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04891261085867882, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04256386309862137, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04161370173096657, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.041392333805561066, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024426260963082314, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021597282961010933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021535471081733704, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019947567954659462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01981271058320999, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012941005639731884, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013318353332579136, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012627487070858479, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009222542867064476, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2428189069032669, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2286248356103897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2243485450744629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20412863790988922, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11363665759563446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10877194255590439, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1256430596113205, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11633819341659546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11512943357229233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10279230773448944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09764792770147324, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06374488025903702, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05548331141471863, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.054261673241853714, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.053975995630025864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031816545873880386, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02756519615650177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027484681457281113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02532440796494484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025134121999144554, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016428321599960327, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01606442779302597, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016014834865927696, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009942576289176941, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2344614714384079, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21216857433319092, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20417186617851257, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18216663599014282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10757597535848618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09933672100305557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1239297091960907, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11437249183654785, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1105801910161972, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09398758411407471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08839630335569382, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06316585093736649, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054796140640974045, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.051728978753089905, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.0509883388876915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03170798718929291, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027078494429588318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026931673288345337, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02441401407122612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02394530363380909, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017087440937757492, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01729574427008629, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016124270856380463, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012115486897528172, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0881931409239769, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0816459134221077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0790502279996872, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0709744542837143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0408351868391037, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03831074386835098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.046685222536325455, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04316297918558121, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04157526418566704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03623901307582855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.034240249544382095, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02368270978331566, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020638981834053993, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019620567560195923, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019367974251508713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011858455836772919, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010211123153567314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01011423859745264, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009299948811531067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009145934134721756, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006246880162507296, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006428281776607037, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005903801415115595, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004427497740834951, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07260876148939133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06724019348621368, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.06497808545827866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05842159315943718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.033650320023298264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0315031036734581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03892895579338074, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03575940430164337, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.034255050122737885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.029897494241595268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.02840558812022209, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.019760694354772568, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.017086034640669823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016136890277266502, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.015913615003228188, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.009886995889246464, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.008347449824213982, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008250872604548931, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007594319526106119, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007448969874531031, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0051742782816290855, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005192325916141272, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0048569864593446255, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0034664920531213284, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2115105241537094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19709926843643188, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19244062900543213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17366117238998413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0987883135676384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09361337125301361, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11081766337156296, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10221166163682938, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10035857558250427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08814144879579544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0832577794790268, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05624764785170555, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04874337092041969, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047187693417072296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04682428017258644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028040839359164238, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02399587072432041, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02388082630932331, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021798601374030113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02156442031264305, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014440775848925114, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014112366363406181, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01386493444442749, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008728425949811935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1895902156829834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16142122447490692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1498430222272873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12509241700172424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08784978091716766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07554814964532852, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10661257058382034, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09728986024856567, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09092443436384201, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06922565400600433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06198754906654358, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05491413548588753, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04727240651845932, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04301261529326439, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04196232184767723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02766026183962822, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023591842502355576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023194007575511932, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.020206917077302933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019531000405550003, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015470286831259727, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016667339950799942, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014162836596369743, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.012934801168739796, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18239107728004456, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1714686155319214, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16809284687042236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15275536477565765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08552352339029312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08169784396886826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09467660635709763, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08774608373641968, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08668137341737747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07714463770389557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07319605350494385, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0481499582529068, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04194457828998566, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04094325006008148, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0407068207859993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02405734360218048, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021050777286291122, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020984016358852386, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01935870200395584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019209163263440132, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012588069774210453, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012682845816016197, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012258986011147499, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00841000210493803, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24615801870822906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23159217834472656, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2271643877029419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2065630704164505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1154494658112526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11033273488283157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12775254249572754, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11832040548324585, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1169736459851265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10420212894678116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09895243495702744, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0648910328745842, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056448981165885925, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05514810234308243, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05483954772353172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03239072486758232, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02803313545882702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027946900576353073, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025712285190820694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025510787963867188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016764149069786072, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01638418436050415, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016316229477524757, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010176082141697407, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23802553117275238, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21459969878196716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2059517800807953, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18380111455917358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10911194980144501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10032320767641068, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1265285313129425, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11665118485689163, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11240030080080032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09503743797540665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08944794535636902, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06441718339920044, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.055899910628795624, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052510205656290054, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05168934166431427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03230878710746765, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027545755729079247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027376046404242516, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024801000952720642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02427959069609642, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017364080995321274, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017725631594657898, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016291063278913498, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012478645890951157, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08694429695606232, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08061634004116058, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07804504036903381, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07011488080024719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.040249429643154144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03777715936303139, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04606480523943901, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.042549978941679, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04095281660556793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03577359765768051, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03380792960524559, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02336070127785206, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020323993638157845, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019300878047943115, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019053997471928596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01168846245855093, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009971232153475285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009873311035335064, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009072738699615002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008917800150811672, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006116487085819244, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006182531826198101, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005765371955931187, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004129860084503889, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07321491837501526, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.06793384999036789, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0656319186091423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05900970846414566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03393332660198212, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03177622705698013, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.03919614851474762, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03615464270114899, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03452626243233681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03018942102789879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.028627220541238785, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0198759026825428, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.017267147079110146, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.016267994418740273, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.016028007492423058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.00993913970887661, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.00840552058070898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008305927738547325, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.007654429879039526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.007500441279262304, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005189491901546717, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.005228283815085888, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.004857844207435846, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.003471749136224389, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20601512491703033, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1921076476573944, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18758170306682587, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16911445558071136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09626144170761108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09118745476007462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10830395668745041, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09960988909006119, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09775204211473465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08591707050800323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08117890357971191, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05504831671714783, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.047530025243759155, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04599853232502937, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04563255235552788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02745506539940834, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023400042206048965, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02327803149819374, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02126210927963257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021023783832788467, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014138638973236084, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.013783138245344162, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013530503958463669, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008548851124942303, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20482389628887177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17763690650463104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16774365305900574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14148356020450592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09419309347867966, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.083689846098423, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11254828423261642, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10271518677473068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09810531884431839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0771123543381691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06963039189577103, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05802037939429283, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050255097448825836, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.046431127935647964, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04547923058271408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0294638779014349, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025861473754048347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02560444176197052, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022810520604252815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022240223363041878, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016838442534208298, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018589375540614128, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015714481472969055, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014938940294086933, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16949185729026794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15918999910354614, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15603092312812805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14174479246139526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07956197112798691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07594143599271774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08822017908096313, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08172766119241714, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08064162731170654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07173226773738861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06805732846260071, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.044903792440891266, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.039124030619859695, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03813178092241287, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03790358453989029, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02243906818330288, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.019698549062013626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019632678478956223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01812390796840191, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0179740097373724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011800994165241718, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01200881041586399, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011476606130599976, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008122381754219532, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23949865996837616, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22520284354686737, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22081492841243744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2007121443748474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11260215193033218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10758662968873978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12485554069280624, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11554313451051712, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11413928121328354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10159560292959213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09645744413137436, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06357238441705704, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.055275388062000275, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053960803896188736, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05365251004695892, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03174588084220886, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027803756296634674, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027723997831344604, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025576584041118622, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02538258582353592, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01666109636425972, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016844144091010094, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016220053657889366, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011278588324785233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22712105512619019, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.20427152514457703, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.1959480345249176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17437240481376648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10409200191497803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09557418525218964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1216612309217453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11130920052528381, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1072792112827301, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09034602344036102, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0849776342511177, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06191244348883629, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05356238782405853, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05029713362455368, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04951009154319763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03131861984729767, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02676166221499443, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02659669890999794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02413320541381836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02363874763250351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017320193350315094, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017686454579234123, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01630762778222561, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012976765632629395, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09643928706645966, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08964703232049942, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08689368516206741, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07812859117984772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.044713400304317474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04203306511044502, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.051055390387773514, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.047170646488666534, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.045454997569322586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.039788100868463516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03765909746289253, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025894656777381897, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022520579397678375, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021432559937238693, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021172955632209778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01295254472643137, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011083022691309452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010978830978274345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010102234780788422, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009935736656188965, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0067921290174126625, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006872258614748716, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006424060091376305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004608509596437216, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.07873030751943588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0731821283698082, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07074650377035141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0636436939239502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03650189936161041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.034234508872032166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.042018238455057144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.03881741687655449, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.03710377961397171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.032535649836063385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.030841205269098282, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02130010910332203, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.018535999581217766, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01750093698501587, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.017254674807190895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.010654992423951626, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009043678641319275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.008939307183027267, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.008245707489550114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008089909330010414, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.005580764263868332, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00561960693448782, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005247713066637516, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0037385050673037767, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2164556086063385, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.202035591006279, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19736629724502563, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17807497084140778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10111770778894424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09590213745832443, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11390998214483261, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1046363040804863, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10268883407115936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09035035222768784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08560702949762344, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057880792766809464, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.049925561994314194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.048319824039936066, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.047930758446455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028894565999507904, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02456762082874775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024440934881567955, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022333458065986633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022092383354902267, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01487088855355978, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014442261308431625, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014215093106031418, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.008923335000872612, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19703085720539093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17464503645896912, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16700050234794617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14190763235092163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09171890467405319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0832769051194191, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10635398328304291, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09769655764102936, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09443636983633041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0762912705540657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06843933463096619, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05506506562232971, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0477423220872879, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04505899176001549, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.044416625052690506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02797522395849228, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024834081530570984, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024655573070049286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022014297544956207, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021615203469991684, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01607065461575985, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017422134056687355, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015292664058506489, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013868064619600773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16918009519577026, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.15906758606433868, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15585929155349731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14166536927223206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07956860214471817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07595577836036682, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0882859155535698, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.081778883934021, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08063993602991104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07180267572402954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06819932907819748, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04506925493478775, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03923247009515762, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03823791444301605, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03800928220152855, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.022536616772413254, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01992233842611313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.01985396444797516, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.018375171348452568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01822730526328087, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011972506530582905, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.012392360717058182, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011645110324025154, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008679750375449657, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23589618504047394, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22182638943195343, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2175246924161911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19785889983177185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11071236431598663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10577666759490967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12288929522037506, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11365412175655365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11219504475593567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.09994855523109436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09487821906805038, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06238197535276413, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05425719544291496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05293070152401924, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05261730030179024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031167255714535713, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026966750621795654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.026875615119934082, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024747800081968307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02454860508441925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01617800071835518, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015856971964240074, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015719089657068253, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009971768595278263, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23039184510707855, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2074199914932251, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.19911159574985504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17723405361175537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10570863634347916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.0970509946346283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12314452230930328, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11284653097391129, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10886231809854507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09177816659212112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08627049624919891, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06259506940841675, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054219357669353485, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05100029334425926, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05022187530994415, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03149130195379257, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026960523799061775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.026811521500349045, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024273669347167015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0237821564078331, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01714080013334751, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01760825701057911, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01611265353858471, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01271092239767313, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10245906561613083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09531787037849426, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0924951434135437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08336551487445831, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04765027016401291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04486546292901039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05435498058795929, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05023607984185219, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04843687266111374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.042534638196229935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04031215235590935, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02768682688474655, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02411891333758831, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022977938875555992, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022702787071466446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01388708595186472, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012094315141439438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011987061239778996, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01109834760427475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010928894393146038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007413502782583237, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007790483999997377, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007036272902041674, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005593661218881607, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08659277856349945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08060294389724731, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07807780057191849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07040710747241974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04025576263666153, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0378485843539238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.046224355697631836, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.042645227164030075, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0409223698079586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03596289083361626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.034138020128011703, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023474423214793205, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02038397267460823, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01931586116552353, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.01906677335500717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011744488030672073, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009987247176468372, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009878745302557945, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009123610332608223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008963813073933125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006165153346955776, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006200088188052177, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005813614930957556, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004146374762058258, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21448737382888794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20045363903045654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19576577842235565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1768278330564499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10030850768089294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09517879784107208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11303898692131042, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10379935801029205, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10181713849306107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08978830277919769, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08509187400341034, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05744103342294693, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04955015704035759, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04794890806078911, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04757159948348999, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028664205223321915, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024434275925159454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024302473291754723, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022263698279857635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022013729438185692, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014808260835707188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014455852098762989, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014169656671583652, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009057929739356041, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2114623785018921, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18359702825546265, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17206692695617676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14367501437664032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09767962992191315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08643613755702972, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11835823953151703, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10798244178295135, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1012004017829895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0790390893816948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07130887359380722, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06101158633828163, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05297825112938881, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04838116094470024, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04725383222103119, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03096696548163891, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027253180742263794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026840312406420708, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02394889108836651, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02326127141714096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017856415361166, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01993018575012684, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01651763543486595, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01618357188999653, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17870035767555237, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1679779291152954, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16455097496509552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1496831774711609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08407974988222122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08027128875255585, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09339330345392227, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08643640577793121, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08524297177791595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07586020231246948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07205713540315628, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.047625865787267685, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04146120697259903, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04040157049894333, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04014421999454498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023810485377907753, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021018076688051224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020945103839039803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019383706152439117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019224490970373154, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012614740058779716, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013044698163866997, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012262774631381035, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009101313538849354, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2419963777065277, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2275552749633789, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22308725118637085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20290857553482056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.113644540309906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1085309088230133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12618154287338257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1166568323969841, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11518193781375885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10258613526821136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09734830260276794, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06410317122936249, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05570488050580025, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05434441193938255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054021235555410385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0319981724023819, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0276789627969265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02758280374109745, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02539074420928955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025186913087964058, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01659112051129341, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01626412943005562, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016120417043566704, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010209770873188972, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24263906478881836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21829001605510712, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20926202833652496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18582208454608917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11152742058038712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10229500383138657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1297706663608551, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11937835812568665, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11492641270160675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09657882899045944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09072750806808472, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06630335748195648, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05743929371237755, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05388430505990982, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05303515866398811, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03333664312958717, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02862503007054329, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028458133339881897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025759320706129074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025235693901777267, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018252886831760406, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018895398825407028, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01710149459540844, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013843809254467487, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10860021412372589, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10104437917470932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09811814874410629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08834648132324219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05045333132147789, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04754362255334854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05744953453540802, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05306787043809891, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05127272382378578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04501121863722801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04260103031992912, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02919752337038517, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02539926767349243, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02422589436173439, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02394586056470871, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014600086025893688, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01258553471416235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01247082743793726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011500026099383831, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011319541372358799, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007691929582506418, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007871215231716633, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00729775708168745, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005385325290262699, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08825945854187012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08213476091623306, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0794842466711998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07161281257867813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04095644876360893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03848603367805481, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0470818430185318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04350117966532707, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.041623279452323914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.036551471799612045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03464339300990105, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023867474868893623, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020772576332092285, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.01963954046368599, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019369548186659813, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011942379176616669, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010154727846384048, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010041629895567894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009269784204661846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009098226204514503, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0062620374374091625, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006313229911029339, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005901126191020012, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004211949184536934, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21399646997451782, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19983652234077454, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19487397372722626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1758294701576233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09995044767856598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09467020630836487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1126675233244896, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10387245565652847, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10149648785591125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08937666565179825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08453144878149033, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057232052087783813, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.049575697630643845, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04777776077389717, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04735521972179413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028554238379001617, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02436930127441883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024215249344706535, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022194141522049904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02191438525915146, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014755330979824066, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014484417624771595, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014097589999437332, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00910554826259613, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22348521649837494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19958120584487915, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19062642753124237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16082273423671722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10418010503053665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09474558383226395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12205418944358826, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11190146207809448, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10720091313123703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08745335787534714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07779821753501892, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06305281817913055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.054827623069286346, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05127423256635666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05042729526758194, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03204988315701485, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02852148935198784, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028236467391252518, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025488857179880142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024963580071926117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018400223925709724, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020319290459156036, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017381373792886734, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016322564333677292, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17557501792907715, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16488878428936005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16152255237102509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.14669077098369598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0826362892985344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.07883059978485107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09191915392875671, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08504322171211243, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08378391712903976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07447776943445206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07076825201511383, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04695278778672218, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04085935652256012, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.039786070585250854, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03953268378973007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023509250953793526, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.020851319655776024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.020772751420736313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019239285960793495, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019083350896835327, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012603119015693665, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01314693596214056, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012253363616764545, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009404131211340427, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23976720869541168, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22533291578292847, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22085437178611755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20058952271938324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1125434935092926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10739172250032425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12497559934854507, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11558068543672562, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11405351012945175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10141286998987198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09624875336885452, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06342510879039764, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.055188968777656555, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053814731538295746, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.053489167243242264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03172549232840538, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027463093400001526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02737206220626831, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02517731301486492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02496844343841076, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016540154814720154, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016216080635786057, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01607322320342064, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010285249911248684, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24668370187282562, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22172671556472778, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21296949684619904, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18890725076198578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1133684292435646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1040741428732872, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13077299296855927, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12066683918237686, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1166936606168747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09800330549478531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09170562028884888, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06678932905197144, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05800369009375572, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05466992408037186, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05386482924222946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033598754554986954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028894206508994102, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028754284605383873, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025940796360373497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025430411100387573, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018340714275836945, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018849672749638557, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017324022948741913, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013620633631944656, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10698095709085464, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09955638647079468, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09660781174898148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08695720136165619, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04970904812216759, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04678686708211899, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05667063966393471, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05234924703836441, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05050230771303177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04433111101388931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041942283511161804, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02877996489405632, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025024792179465294, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02382708340883255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023536788299679756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01439652405679226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012314525432884693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012196567840874195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011232986114919186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011053528636693954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007551178336143494, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007619381882250309, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007147498428821564, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0051008835434913635, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08862046897411346, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08257591724395752, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07988892495632172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07196344435214996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04118692874908447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03869030252099037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04727824777364731, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.043765559792518616, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04184097424149513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03679204732179642, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03485211357474327, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02399469166994095, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020896779373288155, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019746504724025726, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.019464215263724327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.011999109759926796, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010176433250308037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01005906518548727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009288188070058823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009111519902944565, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006277167238295078, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006289439741522074, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005910738371312618, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004143626894801855, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2145199030637741, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20019418001174927, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19515137374401093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17599007487297058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10018979758024216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09482229501008987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11306013911962509, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10421976447105408, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10171759128570557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08952400088310242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08458659797906876, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05741932615637779, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04973624274134636, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04790717363357544, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04746706411242485, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028660662472248077, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024423297494649887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024260761216282845, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022224528715014458, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021940898150205612, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014803717844188213, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014527441002428532, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014117859303951263, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009127825498580933, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2171327918767929, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1933482587337494, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18222841620445251, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1575273871421814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10058361291885376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09045711159706116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1222766563296318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11247657239437103, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10440488904714584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08539149165153503, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07898549735546112, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0631539598107338, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.054750509560108185, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04934065416455269, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04797488451004028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03179280832409859, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02714383788406849, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026642566546797752, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0243521835654974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023526126518845558, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01755138300359249, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019329864531755447, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01585935615003109, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015012637712061405, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.17839890718460083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.16742870211601257, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1638467013835907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1487390547990799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.083984375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08005747199058533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09349264204502106, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08652810007333755, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08516186475753784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0756445974111557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07176090031862259, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04777355492115021, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04160187393426895, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04046286642551422, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.040200624614953995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.023906217887997627, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02123955450952053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021161071956157684, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019596271216869354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019430741667747498, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012792143039405346, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013452433049678802, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012419098988175392, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009668886661529541, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23731885850429535, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22274872660636902, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21824288368225098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19792336225509644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11136698722839355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10617554187774658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1237550601363182, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11447770893573761, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11291336268186569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1002332866191864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09495401382446289, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06283216178417206, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05466041713953018, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053260087966918945, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05292476713657379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031355664134025574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027145208790898323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027049031108617783, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024853534996509552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024641957134008408, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016206027939915657, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016009705141186714, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01571544073522091, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010098248720169067, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2503550052642822, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2246035784482956, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21542063355445862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19025984406471252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11532449722290039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10551398992538452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13355183601379395, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12287741899490356, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11877063661813736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.0991181880235672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09258250892162323, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06829327344894409, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05914914608001709, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.055604591965675354, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05476909875869751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034393273293972015, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029369119554758072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029212255030870438, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02622876688838005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025704151019454002, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01888101175427437, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019128184765577316, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01779955066740513, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01377310510724783, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11227592825889587, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10450036823749542, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10146426409482956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09133529663085938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.052196938544511795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.049189865589141846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0593404583632946, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05484922602772713, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05303511396050453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.046544477343559265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04398823529481888, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030126284807920456, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026192378252744675, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025007985532283783, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024717874825000763, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015067371539771557, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012866852805018425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012753547169268131, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011725591495633125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011539853177964687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007862169295549393, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007869357243180275, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007462167646735907, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005165474023669958, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09200771152973175, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08576084673404694, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08301427960395813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07480848580598831, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04281485080718994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0402231365442276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04904364421963692, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.045394860208034515, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04346894845366478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.038212958723306656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03619370609521866, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.024866163730621338, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021671537309885025, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02050570398569107, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02022450976073742, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012432130984961987, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010554314590990543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010435272939503193, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009630227461457253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009452725760638714, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006495112087577581, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006497016176581383, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006126116961240768, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004254802595824003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22020521759986877, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20542825758457184, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20019882917404175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1804153025150299, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10290896147489548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09732170403003693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11621040850877762, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10716383904218674, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10451450943946838, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09189661592245102, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08680173754692078, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.059088803827762604, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.051226772367954254, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04925599694252014, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048778776079416275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029528087005019188, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02518477849662304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025009941309690475, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022917725145816803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02262037619948387, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015336116775870323, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015094609931111336, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014629666693508625, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009616998955607414, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22501373291015625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19963546097278595, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19074706733226776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1623408943414688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10452769696712494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09522978961467743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12246253341436386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11177588999271393, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10773161053657532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08702602982521057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07936950773000717, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06290743499994278, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05435848608613014, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05107076093554497, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05027253180742264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03176102042198181, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027810974046587944, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027575377374887466, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.024566201493144035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02407972700893879, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017785536125302315, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019174225628376007, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01677936129271984, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014899595640599728, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18217281997203827, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17095644772052765, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1673208475112915, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15172336995601654, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08585882186889648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08182044327259064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09556163102388382, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08845087885856628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08706489205360413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07730016112327576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07331908494234085, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04883850738406181, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042569514364004135, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041405338793992996, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04113202914595604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024438349530100822, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021790988743305206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021706702187657356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.0201053936034441, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01993998885154724, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013087227940559387, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013872346840798855, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012706953100860119, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01006398443132639, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23963098227977753, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.224978506565094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2203732281923294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19986295700073242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1125541552901268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10728651285171509, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12505671381950378, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11569102108478546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11410597711801529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10126839578151703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09592320770025253, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06353067606687546, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05523981899023056, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.053808536380529404, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05346996337175369, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03171953558921814, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027393624186515808, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027294663712382317, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025065748021006584, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024848846718668938, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016383493319153786, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01610763743519783, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015884025022387505, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010083239525556564, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25154027342796326, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2259560376405716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21676670014858246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19130995869636536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11593891680240631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10628197342157364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.133781298995018, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12339302152395248, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1193573847413063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09972064197063446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09307149797677994, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0684666559100151, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05936058238148689, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.055975619703531265, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05515109747648239, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03447872772812843, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029646996408700943, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029499849304556847, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02652326226234436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02600257284939289, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018896061927080154, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01940600946545601, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017854886129498482, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.014103182591497898, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11722498387098312, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10916773229837418, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10610055923461914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09553255140781403, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.054603394120931625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0514584518969059, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06196719408035278, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05726981908082962, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05545901134610176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0487145371735096, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046091243624687195, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03150496631860733, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027434606105089188, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026203874498605728, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02591710537672043, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015766151249408722, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013596954755485058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013482429087162018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012424427084624767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012240370735526085, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00829838216304779, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008477027527987957, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007893047295510769, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005784714128822088, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09288042783737183, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08654196560382843, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0836758017539978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07538796961307526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.043128397315740585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.040483321994543076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.04970880597829819, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0459674596786499, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04382545128464699, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.038524139672517776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03651321679353714, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02519507333636284, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.021948641166090965, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02068268321454525, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020374083891510963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01260377001017332, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01067098043859005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01053338497877121, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009743002243340015, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009546136483550072, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006597895175218582, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006616063881665468, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006196953356266022, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004359805956482887, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2268165946006775, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21177184581756592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20651108026504517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.186292365193367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10606466233730316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10044846683740616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11964711546897888, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11019571125507355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10766644030809402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0947837233543396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08959624916315079, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06083294376730919, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0526709109544754, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.050739772617816925, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05029188096523285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0303972065448761, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025957388803362846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025787902995944023, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023654872551560402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02335124835371971, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01578085869550705, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015556082129478455, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01506613940000534, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009941277094185352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23426933586597443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.20747719705104828, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19824296236038208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.16511936485767365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10922840237617493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09892712533473969, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12742243707180023, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11634383350610733, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1123594269156456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08986209332942963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08072156459093094, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06604018807411194, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05694088712334633, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05372113361954689, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05295654013752937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03360041603446007, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02967214584350586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029440540820360184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02614470198750496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02567412704229355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01935967244207859, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020889990031719208, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.018437789753079414, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0166962668299675, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18291781842708588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17172032594680786, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1681012362241745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15248344838619232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08630619198083878, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08224684000015259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09599389135837555, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08885878324508667, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08750861883163452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07768819481134415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07370760291814804, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04909480735659599, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04277675226330757, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041615694761276245, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04134557023644447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024597104638814926, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021888088434934616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02180180512368679, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020194316282868385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020025109872221947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013212406076490879, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013921689242124557, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012840287759900093, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010079296305775642, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.244455486536026, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22950540482997894, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22484749555587769, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2040332704782486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11488618701696396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10959360003471375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1276695728302002, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11807432770729065, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1164940744638443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10335569828748703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09796813875436783, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06489415466785431, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05638648942112923, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.054945990443229675, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.054605431854724884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.032421402633190155, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027992891147732735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02789345569908619, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025615934282541275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02540053054690361, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01683071441948414, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016474943608045578, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016346504911780357, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010354593396186829, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.253544420003891, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2279108613729477, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21874164044857025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19348368048667908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11669921875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10707343369722366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1352653056383133, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12434680014848709, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12017995119094849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.100568987429142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09405754506587982, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06890158355236053, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05964066460728645, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05620997026562691, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05537986755371094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03459196537733078, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029544131830334663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02938597835600376, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026400065049529076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025869403034448624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018692268058657646, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01905296929180622, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01759544014930725, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013509631156921387, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12081502377986908, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11269561201334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10951083153486252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09879304468631744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05627354234457016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.053117457777261734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0638744980096817, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058993805199861526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05715888738632202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05028766766190529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04758182168006897, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0324438102543354, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028194265440106392, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026961561292409897, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02665739133954048, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016213374212384224, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013887898996472359, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013771064579486847, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012678587809205055, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012493206188082695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00845780037343502, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008500361815094948, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008034941740334034, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0056058610789477825, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09507981687784195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08868998289108276, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08584577590227127, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07743163406848907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04422326385974884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04156462475657463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05078519508242607, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04702853411436081, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044898394495248795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.039570242166519165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03749269247055054, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02571571245789528, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02243625558912754, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02118033543229103, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02087792009115219, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012855758890509605, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010884668678045273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010752960108220577, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009946444071829319, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009753297083079815, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006702195852994919, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0066878776997327805, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006305420305579901, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004337552934885025, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.23184818029403687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21675032377243042, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2112697958946228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19074462354183197, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10864022374153137, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10280254483222961, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12298182398080826, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11323688179254532, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11027973890304565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09728037565946579, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09205327183008194, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06249339506030083, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.054179854691028595, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.051988840103149414, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05146446079015732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031244369223713875, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026550740003585815, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026337815448641777, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02420901693403721, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023866087198257446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016201414167881012, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015861164778470993, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015418805181980133, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009991721250116825, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2339770793914795, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21233579516410828, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2053167223930359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.18031182885169983, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1092187687754631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10113292932510376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12497623264789581, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11425342410802841, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11171749979257584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09351387619972229, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08688518404960632, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06467458605766296, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05573531985282898, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05348524823784828, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052924174815416336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032934799790382385, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029170112684369087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02901487797498703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02623670920729637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02590164728462696, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.019018111750483513, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019998323172330856, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01833765022456646, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015724636614322662, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19082221388816833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1790861189365387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17534977197647095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1591172218322754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0900077223777771, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08575859665870667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10017251968383789, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09271277487277985, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09125366061925888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08097758144140244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07678188383579254, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.051158297806978226, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044544827193021774, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04333215579390526, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04304558411240578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025592433288693428, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0226637814193964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022582007572054863, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020872190594673157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020698923617601395, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01364094391465187, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014241568744182587, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013244765810668468, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010113871656358242, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24891018867492676, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2337576150894165, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22900909185409546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2078142613172531, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11709952354431152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11167337000370026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13008081912994385, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1203593984246254, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11873605847358704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10539183020591736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09987315535545349, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06611162424087524, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05750814452767372, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05602762848138809, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.055674243718385696, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033002641052007675, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02855052798986435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02844863384962082, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026127498596906662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025902388617396355, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017066096886992455, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01681419275701046, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01655932143330574, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010582289658486843, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25685736536979675, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2304421216249466, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22098837792873383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19524477422237396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11810273677110672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10817042738199234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13674551248550415, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12598352134227753, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12171807140111923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10161731392145157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09489630162715912, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0697554424405098, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.060380712151527405, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.056798893958330154, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05593092739582062, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03504639118909836, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029674360528588295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02952074259519577, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026433326303958893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025884337723255157, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01888405717909336, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018926668912172318, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01774810627102852, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013160591013729572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11554422229528427, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10785578936338425, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10486491024494171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09464170038700104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.053935516625642776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05091678723692894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06123383343219757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05656818673014641, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.054768383502960205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04824273660778999, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045707736164331436, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031135693192481995, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027053439989686012, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02584093250334263, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025554100051522255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01556365005671978, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013295235112309456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013180544599890709, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012141636572778225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01195960957556963, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008126834407448769, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00812595710158348, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0077238986268639565, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005335223861038685, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09456785023212433, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0882074311375618, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08539272844791412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07705125212669373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04403655230998993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04138384386897087, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05050643905997276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04677996784448624, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04471530765295029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03939549997448921, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.037342339754104614, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02558629959821701, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02233484946191311, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021098429337143898, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020808767527341843, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.012795629911124706, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010865403339266777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010735542513430119, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009929688647389412, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009740473702549934, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006682905834168196, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0067002237774431705, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006293997168540955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004390830174088478, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.236666738986969, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2214672565460205, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21605989336967468, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19530925154685974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11108480393886566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10526417940855026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12537027895450592, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11545956879854202, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11274831742048264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09951465576887131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09421578794717789, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06375624239444733, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05522727221250534, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05317969247698784, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05267927423119545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031850822269916534, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027148345485329628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026967959478497505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02476140484213829, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024452118203043938, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016506794840097427, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01621554046869278, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01575065217912197, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010277196764945984, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21559123694896698, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19190852344036102, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1809845119714737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15948486328125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09926893562078476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08943706750869751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12153902649879456, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11144188046455383, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10355468839406967, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08561381697654724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08008405566215515, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06252458691596985, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05396665632724762, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.048379894345998764, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.046978794038295746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03152204677462578, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026082642376422882, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025583846494555473, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02349800243973732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022643472999334335, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017195789143443108, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018042849376797676, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015415111556649208, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013412097468972206, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19110101461410522, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17948701977729797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1757057011127472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15958033502101898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09021422266960144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08598122000694275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10032670199871063, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09284850209951401, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09144960343837738, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08122435212135315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07707522064447403, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.051227644085884094, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04467193782329559, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.043474625796079636, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04319486394524574, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025648178532719612, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022825120016932487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0227397121489048, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02105872519314289, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020878758281469345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013723362237215042, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014460649341344833, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013333902694284916, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01040048711001873, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2518633306026459, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23662078380584717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23186056315898895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2104964256286621, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1185324490070343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11311262845993042, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13167357444763184, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1217888668179512, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12017787992954254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10674212127923965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10117664933204651, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0669272169470787, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.058203741908073425, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.056727707386016846, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0563800111413002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033426303416490555, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02892441116273403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028820926323533058, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026482628658413887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026260877028107643, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017344092950224876, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017054211348295212, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016838818788528442, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010778502561151981, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2562080919742584, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2296588271856308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21981334686279297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19399577379226685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11777666956186295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10767614841461182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13700154423713684, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12620042264461517, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12152200937271118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10117480903863907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09460276365280151, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06990815699100494, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06048934534192085, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.056678593158721924, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05576742812991142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0351363942027092, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029680004343390465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029492167755961418, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02642558515071869, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02581942267715931, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018955348059535027, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01904265210032463, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017746450379490852, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013263767585158348, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11928191035985947, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11139228194952011, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10840637236833572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09790258854627609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055725425481796265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05266408994793892, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06316057592630386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05832820013165474, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056552618741989136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0498817004263401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047273069620132446, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0321221686899662, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02792176976799965, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026732776314020157, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026453226804733276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016073908656835556, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013829261064529419, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013719117268919945, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012660189531743526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012482875026762486, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00843781791627407, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008554312400519848, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0080419285222888, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0057661207392811775, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09514188021421432, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08880572766065598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08602543920278549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07761967182159424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04427964240312576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0416732057929039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05068761855363846, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04699086397886276, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.044974435120821, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.039653562009334564, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.037567611783742905, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02570563182234764, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022436343133449554, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021228140220046043, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02093128114938736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01285263616591692, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0109258983284235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010801145806908607, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.009990795515477657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009804870933294296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006717904470860958, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006725504528731108, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0063381437212228775, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004403219558298588, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2441459447145462, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22871604561805725, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22319519519805908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20208534598350525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11469075083732605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1088615283370018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12915652990341187, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11899039149284363, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11634697020053864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10285162925720215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09747910499572754, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06564982235431671, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05689206346869469, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05487518385052681, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05440696328878403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03276646137237549, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028006497770547867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027823131531476974, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02557414397597313, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025266552343964577, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016951946541666985, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016659021377563477, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016206905245780945, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010501392185688019, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2296409159898758, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2047640085220337, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.19703388214111328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.17123879492282867, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10583438724279404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09622932970523834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12337808310985565, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11184009909629822, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10885501652956009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08886480331420898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08157160133123398, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06332339346408844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05443662032485008, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05176118388772011, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.051108960062265396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032380178570747375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028325645253062248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028157806023955345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025201858952641487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024813085794448853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018734315410256386, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0195845328271389, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01795647107064724, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01542739849537611, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18758732080459595, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17618732154369354, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1725110113620758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15661293268203735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08852224051952362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08436935395002365, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09847801923751831, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09108178317546844, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08975251764059067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07970767468214035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07566047459840775, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.050269439816474915, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.043785497546195984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.042614445090293884, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.042342495173215866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025166712701320648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022293437272310257, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022209545597434044, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02054746262729168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020376021042466164, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013430018909275532, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014018825255334377, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013047787360846996, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00996805913746357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2508592903614044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2357470840215683, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23101796209812164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2097354531288147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1180514320731163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11265130341053009, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13117381930351257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12127166986465454, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11966273188591003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10630355775356293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10088033229112625, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06671030074357986, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05793868005275726, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05647503212094307, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0561298206448555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033345162868499756, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02879044972360134, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.0286924596875906, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026368875056505203, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02614620327949524, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017366549000144005, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016968773677945137, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.0168714951723814, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010705122724175453, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2530781030654907, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22686634957790375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2171534299850464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19185364246368408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11630519479513168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10628801584243774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13526780903339386, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12457320839166641, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11992590129375458, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09998904913663864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09350917488336563, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06909044831991196, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05972577631473541, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05596473067998886, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.055043816566467285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03474162518978119, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02927457168698311, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029086001217365265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026064829900860786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02547604963183403, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01880848966538906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01875882036983967, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01760523021221161, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013044417835772038, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1189582347869873, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11124701052904129, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10822567343711853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09776890277862549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05556543543934822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05252247303724289, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06316597014665604, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058284275233745575, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.056399326771497726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04982483759522438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04733894020318985, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032118700444698334, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02789050154387951, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026640133932232857, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02633814699947834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016063913702964783, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01371084526181221, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01359029021114111, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01254737377166748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012361555360257626, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008384068496525288, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00838301982730627, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007976103574037552, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005513036157935858, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09449250251054764, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.0882381796836853, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08542747050523758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07715067267417908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04402455314993858, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04139471799135208, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0505792535841465, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04686172306537628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04469811171293259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03945361077785492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03744116052985191, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.025651659816503525, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02237808331847191, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02111203409731388, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.020804105326533318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01282900758087635, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.010879223234951496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010745465755462646, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00995807908475399, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009764954447746277, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0067134094424545765, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006728886626660824, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006316459737718105, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004423720762133598, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24542368948459625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23011286556720734, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2247505635023117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20362535119056702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11531589180231094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10957825928926468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12975604832172394, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11944294720888138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11697649955749512, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10357238352298737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09817352890968323, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0659705102443695, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.057091161608695984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05518430471420288, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05472322553396225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03293915092945099, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028124690055847168, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027955347672104836, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02570200338959694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02540656179189682, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0170323196798563, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016667906194925308, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016298450529575348, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010449402965605259, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22112934291362762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19453363120555878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.18549005687236786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15301813185214996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10237224400043488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09309789538383484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12002831697463989, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10983220487833023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10580436885356903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0837927907705307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07468913495540619, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061769675463438034, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.053444791585206985, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05009687691926956, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.049270760267972946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031194815412163734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02727205865085125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02703068032860756, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02381499484181404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.023305295035243034, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017482906579971313, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01881248876452446, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01645158790051937, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014607276767492294, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19309647381305695, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18129952251911163, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17762041091918945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16143834590911865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09108419716358185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08685406297445297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10128583014011383, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09371032565832138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09235866367816925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08203942328691483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07787664234638214, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05165334790945053, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.044991232454776764, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04380892962217331, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0435258112847805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02589333988726139, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.022815439850091934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022735990583896637, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02101173810660839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.0208412017673254, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013824410736560822, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014191883616149426, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013445024378597736, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009920410811901093, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25543755292892456, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2401346117258072, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23529022932052612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21375113725662231, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12022077292203903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11474734544754028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13361568748950958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12346813827753067, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12187576293945312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10832413285970688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10280376672744751, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06787365674972534, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.059012770652770996, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05753672122955322, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.057190317660570145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.034001220017671585, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02936328575015068, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02926257997751236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026913052424788475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026689477264881134, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017830152064561844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017350779846310616, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017350051552057266, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011019328609108925, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2568312883377075, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23037919402122498, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2204909473657608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19509099423885345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1180994063615799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10788863152265549, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1373567283153534, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12668845057487488, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12177015095949173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10175637900829315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0952117070555687, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07006736844778061, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06085188686847687, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05690259113907814, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05593835562467575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.035183943808078766, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029901444911956787, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02968573570251465, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02671149931848049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02609170228242874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018739063292741776, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019345929846167564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017398197203874588, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013628083281219006, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12025976181030273, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11254678666591644, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10949552804231644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09905340522527695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05626649782061577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05322134122252464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06392154842615128, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05899703875184059, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.057088837027549744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05054287612438202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04800882935523987, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03248325362801552, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02823379822075367, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0269752386957407, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026674408465623856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016259800642728806, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013899738900363445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013779195956885815, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01274050585925579, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012549679726362228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00850875023752451, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008520198054611683, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008102919906377792, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005629071965813637, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09893360733985901, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09254290163516998, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08973587304353714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08116074651479721, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04619533568620682, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04353492334485054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.052867989987134933, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04894449934363365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.046885453164577484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04145864397287369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03936781734228134, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026812611147761345, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02337256819009781, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022136293351650238, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021832002326846123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013403520919382572, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011375238187611103, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011246787384152412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010416178964078426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010229872539639473, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006992480717599392, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0069695208221673965, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00660461001098156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004528046119958162, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25282204151153564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23717860877513885, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23186686635017395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21021775901317596, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11887159198522568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11306559294462204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13336367905139923, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12287820875644684, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12054809927940369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10686388611793518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10136117041110992, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06782374531030655, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05870788171887398, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.056845251470804214, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056402452290058136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03384782001376152, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028895149007439613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02873777225613594, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026397576555609703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026115022599697113, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01740594580769539, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016973312944173813, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01668953150510788, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010469950735569, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2346895933151245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2107866108417511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.20225460827350616, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1707243174314499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10971605032682419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10091342031955719, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12689663469791412, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11665146797895432, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11264017224311829, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09164673835039139, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.08204078674316406, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0655515044927597, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05677320435643196, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.053592976182699203, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.052827998995780945, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03314604610204697, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02916131727397442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02892022393643856, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025788143277168274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025305524468421936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01870681904256344, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.020021479576826096, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017765695229172707, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.015550503507256508, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19981209933757782, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18764983117580414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.183819979429245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16689378023147583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09426754713058472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0898754671216011, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10462195426225662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09690746665000916, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09556756913661957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08484365791082382, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08046987652778625, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.053383927792310715, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.046499647200107574, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04529722407460213, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04501044750213623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026676833629608154, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02352437749505043, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023441556841135025, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021643422544002533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02146657183766365, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014093958772718906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014536936767399311, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013701751828193665, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01006422657519579, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.26206448674201965, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24627740681171417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.24140970408916473, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21923433244228363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12342868745326996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11779516190290451, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13680501282215118, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12673325836658478, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12512393295764923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.11119048297405243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.1053926944732666, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0695260614156723, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0605405792593956, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.059044159948825836, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.058687712997198105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03470690920948982, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.030072089284658432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02997358702123165, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02754330076277256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.027316248044371605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017930403351783752, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017685458064079285, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017421754077076912, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011103599332273006, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.26018333435058594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2330775111913681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2227766364812851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19729556143283844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11953706294298172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.1090502142906189, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1395472139120102, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12868112325668335, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12334822118282318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1029566302895546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09648680686950684, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0710568055510521, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06162674352526665, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.057476241141557693, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.056476570665836334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03559219464659691, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02999056689441204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02977048233151436, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026717612519860268, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02606634609401226, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01891496032476425, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.019148685038089752, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01757359318435192, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013143932446837425, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11937963962554932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11163453757762909, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10848893225193024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09805610775947571, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055855199694633484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05274537205696106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06362483650445938, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05869007110595703, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05666886270046234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05007673799991608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04760701581835747, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03238557279109955, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.028110170736908913, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026787031441926956, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02647782489657402, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01621398888528347, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013847991824150085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013721471652388573, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012691839598119259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012494302354753017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008523036725819111, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008572513237595558, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008100143633782864, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005751615855842829, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.0982118546962738, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09175053983926773, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.088788241147995, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08022430539131165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.045827869325876236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04306976497173309, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.052716534584760666, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04883498698472977, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04652004316449165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04106810316443443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03897618129849434, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026709001511335373, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02330916002392769, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021957460790872574, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021630093455314636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013357840478420258, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011301884427666664, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011156812310218811, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01034344732761383, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010138137266039848, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006992454640567303, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0069749196991324425, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0065774270333349705, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045559764839708805, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2494666576385498, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2338763326406479, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2284860461950302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20708918571472168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1172717809677124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1114351823925972, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1318255215883255, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12144163250923157, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11892306804656982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10530593246221542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09984228760004044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06702195107936859, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05797066539525986, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.056058935821056366, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055607669055461884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03344229981303215, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028548527508974075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028399011120200157, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026071134954690933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02578955888748169, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017229434102773666, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016851555556058884, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01650231145322323, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010497505776584148, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2113293558359146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18626397848129272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1780509054660797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15273457765579224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09823688864707947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08886182308197021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11389487236738205, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10404501110315323, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10122012346982956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08085983246564865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07474619895219803, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.058822065591812134, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050580114126205444, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.047986533492803574, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04734441637992859, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029886219650506973, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02610807865858078, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025939183309674263, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.022879768162965775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022480767220258713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017007917165756226, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01787705346941948, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016254141926765442, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013915101997554302, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19935698807239532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1872551441192627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1834917515516281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16670037806034088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09399676322937012, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08967357873916626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10456879436969757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0966247096657753, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0953165739774704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0846666544675827, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0804324522614479, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05326501652598381, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0463552325963974, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04518057033419609, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04490537941455841, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026700492948293686, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023547085002064705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023464567959308624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02168572135269642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02150728926062584, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014275944791734219, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01464614924043417, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01389379147440195, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010246425867080688, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2564825117588043, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2410866767168045, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23641426861286163, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2147848904132843, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12079010158777237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11533714085817337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13402937352657318, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12401383370161057, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12246056646108627, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10884801298379898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10324320942163467, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06813682615756989, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05925900861620903, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05779552459716797, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05744723975658417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03408767655491829, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02950144372880459, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029404746368527412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.027039263397455215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02681839093565941, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017819436267018318, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01742260530591011, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017338743433356285, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011053109541535378, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.252890944480896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22556662559509277, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2148679494857788, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19019891321659088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11595078557729721, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10523789376020432, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13611651957035065, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12564052641391754, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11992666125297546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09957528859376907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.0934062972664833, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0693584606051445, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06012449041008949, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05572693049907684, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054654136300086975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03468606621026993, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029028598219156265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028782254084944725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025790758430957794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025093885138630867, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018396226689219475, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018532823771238327, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016980597749352455, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01260119117796421, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11544294655323029, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10796855390071869, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10488123446702957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09488770365715027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05389458313584328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05089356750249863, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06142077594995499, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.056766316294670105, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0546988770365715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048406269401311874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045993681997060776, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031175222247838974, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027124108746647835, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025826916098594666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025514768436551094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015584815293550491, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013281824998557568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01315518282353878, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012169260531663895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011971971020102501, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00815228745341301, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008134724572300911, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007730966433882713, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00532523263245821, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09696703404188156, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09061149507761002, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08764055371284485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07926090061664581, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04519518092274666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04249276965856552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05205417796969414, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.048157643526792526, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.045897457748651505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04055428504943848, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03852253034710884, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026374202221632004, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02300281822681427, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021656803786754608, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021335449069738388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013181159272789955, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01113955769687891, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.010991878807544708, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010198330506682396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009992517530918121, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006889715790748596, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006869862787425518, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006471666041761637, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004473057575523853, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24142110347747803, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.22622641921043396, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22095747292041779, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2002340406179428, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11321847885847092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1075143814086914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1274760514497757, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11746623367071152, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11483486741781235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10166071355342865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09647941589355469, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0647265836596489, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056059177964925766, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.054104384034872055, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.053642984479665756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0323113277554512, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027547061443328857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027381639927625656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025168726220726967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.024870825931429863, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01666794717311859, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016284964978694916, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01593857817351818, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010133528150618076, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2195887416601181, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.19031688570976257, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1798030436038971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.15259455144405365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10049599409103394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09008193016052246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12010039389133453, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10960853844881058, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10516944527626038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08256629854440689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07549376785755157, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06185879185795784, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05322302505373955, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0491207093000412, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.048090554773807526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031488172709941864, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026776757091283798, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02650226093828678, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.023450808599591255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022830693051218987, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018062006682157516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01861775666475296, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01685868389904499, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01437846664339304, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19604448974132538, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18422836065292358, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1804381161928177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16405634582042694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09230384975671768, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08806969225406647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1026366725564003, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09488064795732498, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09358273446559906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0832076370716095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07904741168022156, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05228149890899658, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04548259079456329, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.044325586408376694, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04405597969889641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026197291910648346, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023045632988214493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022966036573052406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021221643313765526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02104736864566803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01396613847464323, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014250610955059528, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013583862222731113, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009877137839794159, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24986714124679565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2348884642124176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23026646673679352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20931342244148254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11747243255376816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11216666549444199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1304405927658081, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12063442170619965, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11911191791296005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10592077672481537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10063426196575165, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06621626019477844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.057619426399469376, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0562036857008934, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05586521327495575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03311673551797867, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028725702315568924, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028630610555410385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02635398507118225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026133181527256966, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017250491306185722, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017015114426612854, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016777897253632545, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010866408236324787, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2534310519695282, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22550876438617706, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21480004489421844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1903998851776123, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11617211252450943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10540414601564407, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1370779275894165, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12572172284126282, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12021192908287048, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09963976591825485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09352896362543106, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0697709172964096, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06030182167887688, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05602242425084114, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05496268346905708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03504244610667229, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029507452622056007, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029279902577400208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026298044249415398, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025620440021157265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018931975588202477, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01924952119588852, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017531976103782654, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.013631097041070461, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11644972860813141, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10891712456941605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10583965480327606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09580491483211517, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.054375264793634415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.051368068903684616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06194952502846718, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05721382051706314, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.055181585252285004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0488475039601326, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046455226838588715, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0314815491437912, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0273516196757555, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026065556332468987, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025758910924196243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01575302705168724, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013442381285130978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013314349576830864, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012325815856456757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012134830467402935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008270957507193089, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008275698870420456, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007862940430641174, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005490310490131378, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09691792726516724, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09058626741170883, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08751638233661652, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07912418991327286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04510011896491051, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.042374011129140854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05212927609682083, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.048278920352458954, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04578842595219612, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04049529507756233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03850046545267105, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026406727731227875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023042449727654457, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021625006571412086, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021286621689796448, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01322330441325903, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011160661466419697, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011006132699549198, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010229682549834251, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010014854371547699, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0069551775231957436, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006939592305570841, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006521426606923342, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0045747836120426655, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24904102087020874, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23348215222358704, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2282581627368927, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20694832503795624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11682284623384476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11109502613544464, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1310618668794632, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12071239203214645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11847308278083801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10496827960014343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0996406301856041, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06660956144332886, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05761711299419403, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055797088891267776, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05537157133221626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033225949853658676, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028384797275066376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028244439512491226, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02593761868774891, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025669598951935768, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01711886003613472, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01669669710099697, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016429444774985313, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010345798917114735, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20050941407680511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17663106322288513, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16969788074493408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13982385396957397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09288278967142105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08465413749217987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10597377270460129, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09746941179037094, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09563542157411575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07503718882799149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06773560494184494, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05469396710395813, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04758088290691376, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.045563772320747375, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04508444666862488, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0277864970266819, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02513512410223484, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02502068318426609, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02188977412879467, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021582357585430145, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015938065946102142, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01753499172627926, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015352710150182247, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01402140874415636, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18338240683078766, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.172341987490654, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1689501255750656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15360015630722046, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08625727146863937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08234500885009766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09568674862384796, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0886024758219719, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08743371069431305, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0778140127658844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07389906048774719, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04873557388782501, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.042431242763996124, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041369177401065826, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04111630842089653, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02435721643269062, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021415583789348602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021342331543564796, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019712358713150024, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019553590565919876, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012843504548072815, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013114351779222488, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012498104013502598, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008943254128098488, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23583506047725677, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22190341353416443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.21760636568069458, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1978944092988968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11086094379425049, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10591643303632736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.122831791639328, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11374873667955399, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11237410455942154, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10008057951927185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09494855999946594, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.062409527599811554, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0542965866625309, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05297539383172989, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.052662719041109085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031139178201556206, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.026966912671923637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02688075229525566, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.024730298668146133, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024538280442357063, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016094421967864037, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.015809835866093636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015656983479857445, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009873485192656517, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23746225237846375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2112555354833603, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20101483166217804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17823658883571625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1085096076130867, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09833641350269318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12828299403190613, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11784277111291885, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11237339675426483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09320327639579773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08773474395275116, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06529088318347931, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.056478701531887054, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05225721001625061, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05122670531272888, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03279566019773483, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02745724283158779, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02722550928592682, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024479232728481293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023821812123060226, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017758334055542946, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017860669642686844, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016428256407380104, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012528790161013603, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11921454221010208, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11190035194158554, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1090402752161026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09902963787317276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05580107495188713, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05293748527765274, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06299173831939697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.058296844363212585, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05658213421702385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0502651184797287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04777274280786514, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031936679035425186, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02782292291522026, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026694880798459053, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026424488052725792, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015965698286890984, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013651998713612556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013546470552682877, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012522934935986996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012352153658866882, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00831446424126625, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008205024525523186, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007943947799503803, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005238779820501804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10416638106107712, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09768043458461761, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09500624239444733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08625224977731705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04867640882730484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.046099703758955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055395931005477905, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05115913972258568, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04937247931957245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.043860867619514465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.041791610419750214, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028094343841075897, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024411382153630257, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02329801581799984, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02303493209183216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014029081910848618, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011920992285013199, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011807960458099842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010937705636024475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010767065919935703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007288873661309481, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0071948920376598835, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006925664842128754, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004583531059324741, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24810442328453064, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23331642150878906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22847436368465424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20756860077381134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11653759330511093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11115104705095291, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1301591694355011, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12009541690349579, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11812536418437958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10506093502044678, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09982535988092422, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06608898937702179, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05729755759239197, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05568511411547661, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055294208228588104, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03297596424818039, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02829873189330101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0281688142567873, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02592010237276554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025668339803814888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016977675259113312, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01656017266213894, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016375679522752762, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010217228904366493, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.22667351365089417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1905261129140854, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17822110652923584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14726103842258453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10498055070638657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.09115125983953476, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12277896702289581, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11289548873901367, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10881376266479492, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08046895265579224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07233253866434097, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06318014115095139, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05468842759728432, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05100802704691887, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05011134222149849, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03174571320414543, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02740437537431717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027142690494656563, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02281022071838379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.022207602858543396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017471926286816597, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018514618277549744, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016334140673279762, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013905437663197517, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1842491328716278, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17328345775604248, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1698862761259079, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1544850766658783, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08667843043804169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08280643820762634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0961015522480011, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08898120373487473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08785775303840637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07824498414993286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07431846857070923, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04894787073135376, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0426044687628746, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.041558291763067245, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04131026193499565, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.024460673332214355, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021492553874850273, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021423792466521263, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01979195512831211, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019639121368527412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.012881441973149776, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013127228245139122, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01253390870988369, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.008912255056202412, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23885856568813324, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22480729222297668, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22057442367076874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20069414377212524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11223683506250381, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10725262761116028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12454939633607864, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1151084378361702, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11373676359653473, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.101357601583004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09640190750360489, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06327013671398163, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05495944619178772, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05365827679634094, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05334097146987915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.031642116606235504, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027378316968679428, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02729097008705139, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02513260394334793, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.024934669956564903, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01650497503578663, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01614684797823429, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016071688383817673, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010228515602648258, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23708830773830414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21131131052970886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20086757838726044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.17874141037464142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10845368355512619, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09817934036254883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1286328136920929, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11831526458263397, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11226484179496765, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09351257234811783, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08819102495908737, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06545090675354004, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05672488734126091, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05224498361349106, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05114072561264038, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032769620418548584, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027416307479143143, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027135590091347694, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.0244887862354517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02377908304333687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017502950504422188, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017828114330768585, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01604568213224411, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01241213083267212, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11654741317033768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10923560708761215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10631068795919418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09646311402320862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05448243021965027, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05160946026444435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06165703013539314, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05706344544887543, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05526794120669365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04901923984289169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04659159481525421, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03128829598426819, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027273228392004967, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026078464463353157, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025796370580792427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01564849354326725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013370967470109463, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013254520483314991, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012256519868969917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01207665540277958, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008156422525644302, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008098093792796135, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007768470793962479, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052297296933829784, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09771084040403366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09157706797122955, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08886723220348358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08063369244337082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04566366225481033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04311516880989075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05229732021689415, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04830165207386017, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04632076248526573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04110269248485565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03913578391075134, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026519695296883583, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023067425936460495, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02186393551528454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021575840190052986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013257438316941261, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011207147501409054, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011080851778388023, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010280627757310867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010098965838551521, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006901971064507961, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006824213080108166, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006514572538435459, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004384731873869896, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24689272046089172, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23183241486549377, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2268953174352646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2061193734407425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11587799340486526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11042680591344833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12964102625846863, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11952824145555496, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11752752214670181, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10442662984132767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09918384253978729, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06581509858369827, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05702439323067665, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.055377811193466187, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054975636303424835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03284354507923126, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028148414567112923, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028018180280923843, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02576320990920067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025517567992210388, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016884103417396545, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016503797844052315, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016252048313617706, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010192707180976868, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20694918930530548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1813988983631134, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17325809597969055, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14567209780216217, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0958050787448883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08635340631008148, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11022452265024185, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10147101432085037, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09911032021045685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07808124274015427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.069933220744133, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05661720037460327, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04896542802453041, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04639040306210518, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04576290398836136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028452496975660324, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024633336812257767, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024485910311341286, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02121923491358757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02081386372447014, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015510809607803822, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016168875619769096, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01470470242202282, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011862224899232388, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18825308978557587, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1770857274532318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17360453307628632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.15799330174922943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08854996412992477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08458781987428665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.09815716743469238, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09087736904621124, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08974795043468475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07997135818004608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07599038630723953, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.049959927797317505, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04349929094314575, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.042448051273822784, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04219724237918854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02497134916484356, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021930577233433723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021860523149371147, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020198078826069832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020043812692165375, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013150832615792751, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013366686180233955, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012805521488189697, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009048090316355228, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24018584191799164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2261182963848114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22171632945537567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20190607011318207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11287558078765869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10791009664535522, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12502795457839966, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11576387286186218, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11440367996692657, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10196279734373093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09683696925640106, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06351625174283981, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05526066571474075, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05394267290830612, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05363285541534424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03169751912355423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027453115209937096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027366556227207184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025194860994815826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02499311789870262, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016400832682847977, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01608772575855255, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01596328616142273, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01003793440759182, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23960146307945251, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2142992615699768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20393827557563782, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18168318271636963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10968166589736938, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09968499839305878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12980566918849945, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11945431679487228, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11348049342632294, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09486694633960724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08938933908939362, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06597423553466797, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05715787410736084, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.052769917994737625, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.051692843437194824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03323593735694885, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027602873742580414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027325639501214027, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02469184249639511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023998960852622986, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018059201538562775, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017801987007260323, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01669928804039955, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01225607842206955, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11663922667503357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10940247774124146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10653311014175415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0966690257191658, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.054571732878685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.051682088524103165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06209429353475571, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057242367416620255, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05535656213760376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04916291683912277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04680969566106796, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03149524703621864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02734978124499321, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026143327355384827, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025853820145130157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015780286863446236, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013415385968983173, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013299320824444294, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012314802967011929, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012128939852118492, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008265558630228043, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008142491802573204, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007865089923143387, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005275167990475893, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10001832246780396, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09380242973566055, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09117339551448822, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08271492272615433, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.046788182109594345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04422878101468086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.053328368812799454, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.04930195212364197, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04744540527462959, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04214150458574295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.040096964687108994, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027043435722589493, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02352948486804962, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022393127903342247, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02212492749094963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013512316159904003, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01145522017031908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01134092453867197, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010509295389056206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010335423052310944, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007019097916781902, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0069275544956326485, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006650762166827917, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004410238936543465, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24487817287445068, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23004746437072754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22524333000183105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20473293960094452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11505302786827087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10965509712696075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12866675853729248, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11868224292993546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11659876257181168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10371901839971542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0985865667462349, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06535334885120392, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.056620512157678604, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05496459826827049, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.054571110755205154, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03260112926363945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.027934059500694275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.027799587696790695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.025587450712919235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025319794192910194, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01677025482058525, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0163680799305439, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016142085194587708, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010092625394463539, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19123677909374237, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1634690761566162, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15588417649269104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1282113492488861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08683440089225769, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07725473493337631, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10090532153844833, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09103380143642426, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.08929765969514847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06821606308221817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06307511776685715, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05164637416601181, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04498701170086861, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0430835522711277, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04262296482920647, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026983989402651787, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02444053627550602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024318233132362366, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021221837028861046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.020928405225276947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016359852626919746, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017774386331439018, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015799732878804207, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014763436280190945, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19535520672798157, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1838672161102295, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18024347722530365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1640768200159073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09196676313877106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08786151558160782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10197518020868301, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09437649697065353, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09320257604122162, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08304624259471893, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07892627269029617, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05192412808537483, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04516665264964104, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04406696557998657, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.043815258890390396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025957323610782623, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02273249626159668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022659247741103172, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02092665620148182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020766958594322205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013671427965164185, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01379817072302103, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01331346109509468, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009267308749258518, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24438074231147766, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23007187247276306, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22567489743232727, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20550112426280975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11495191603899002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10988643765449524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12743891775608063, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11790098994970322, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11650742590427399, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10384204983711243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0987391471862793, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0648748055100441, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056323833763599396, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05498533323407173, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05466713756322861, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03245928883552551, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02808116376399994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027989918366074562, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02579481527209282, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025594670325517654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01698172651231289, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016599755734205246, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01654762774705887, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010582459159195423, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2390538603067398, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21403659880161285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20387767255306244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18192259967327118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10942984372377396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09955676645040512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12889015674591064, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11903754621744156, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11317775398492813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09486057609319687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08945929259061813, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06557049602270126, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05692870914936066, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05258693918585777, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05152161791920662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.032871175557374954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027314990758895874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027059702202677727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024409061297774315, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.023726943880319595, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01749829761683941, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017371198162436485, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016122305765748024, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.0116761215031147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11979558318853378, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1123868077993393, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10949555039405823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09938529133796692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.056106746196746826, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05320880189538002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06351921707391739, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05875104293227196, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0569244846701622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050529588013887405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04805750027298927, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03223948925733566, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02804790623486042, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026861194521188736, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0265762098133564, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016107972711324692, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01376043539494276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013645228929817677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012623843736946583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012444324791431427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008400657214224339, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008312035351991653, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008015165105462074, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00534428795799613, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10199293494224548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09569980204105377, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09303981810808182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08446786552667618, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.047767214477062225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0451820008456707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.054528914391994476, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05032796412706375, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.048445068299770355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04305269941687584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04100501537322998, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027653761208057404, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02403605356812477, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022868234664201736, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022586531937122345, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013824524357914925, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011701715178787708, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.011578639969229698, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010738503187894821, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010558347217738628, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007180592976510525, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00707276351749897, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0068038334138691425, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.00449906662106514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.25160813331604004, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23652032017707825, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23164814710617065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21062657237052917, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11832946538925171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.1127842366695404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13214388489723206, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1219405084848404, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11992349475622177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10667552053928375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10136952251195908, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06709516793489456, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05818570405244827, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05653051659464836, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0561368502676487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03348800912499428, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028709203004837036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02858526073396206, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026285378262400627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026040932163596153, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017201755195856094, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01677006110548973, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016580013558268547, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01030676905065775, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.20096632838249207, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17582200467586517, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16590528190135956, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.141113743185997, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09344898164272308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08284977823495865, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11063489317893982, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1018000990152359, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09633560478687286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0756748616695404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06985463947057724, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05697596073150635, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.049089740961790085, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04527677223086357, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04434237256646156, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.02861039713025093, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.024029510095715523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02367543615400791, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02058926224708557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019975410774350166, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015515776351094246, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015965163707733154, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014311308041214943, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011529705487191677, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.199577197432518, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18774853646755219, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18414157629013062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16762666404247284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09397820383310318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0897684395313263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10417073965072632, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.0964132770895958, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0952407494187355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08482255041599274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08073460310697556, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.053022921085357666, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04614771530032158, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.045042309910058975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04477594047784805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026523662731051445, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023265913128852844, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023193171247839928, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021413417533040047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021248430013656616, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013972637243568897, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014162091538310051, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013615482486784458, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009565839543938637, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24538277089595795, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23100757598876953, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22659087181091309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20626279711723328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.1154494658112526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11035458743572235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.127935528755188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11841152608394623, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11700502038002014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1042696163058281, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0991450622677803, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06504865735769272, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.056577417999506, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0552278496325016, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0549030639231205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03251237794756889, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02818620391190052, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02810150384902954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025880100205540657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025675954297184944, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016940465196967125, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016647284850478172, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016499701887369156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010591969825327396, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24281175434589386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2175312340259552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20732074975967407, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18475133180618286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11112574487924576, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10119154304265976, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13087935745716095, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12068457901477814, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11485344171524048, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09624674916267395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09065821766853333, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06637074053287506, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05759069323539734, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05331164598464966, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.052257392555475235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03319728374481201, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027560316026210785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02730303443968296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02458268217742443, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02390451356768608, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01750321127474308, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01733461394906044, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01610678993165493, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01141607016324997, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11849037557840347, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11120142042636871, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10825417190790176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09829375147819519, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.055547211319208145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.052622225135564804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06314487755298615, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05822153761982918, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05634337663650513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.050014473497867584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04768258333206177, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032055385410785675, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027819447219371796, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02659379504621029, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02630695514380932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.016024550423026085, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01361385639756918, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01349849347025156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012481722049415112, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012296229600906372, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00834717322140932, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008213603869080544, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007947218604385853, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0052534183487296104, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10283558815717697, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09651070833206177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09380082786083221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08519261330366135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04815813526511192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04554217308759689, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055216141045093536, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.050748806446790695, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.048839833587408066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04338005557656288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04145841673016548, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027979830279946327, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024226102977991104, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.023063912987709045, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02278617024421692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013981332071125507, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011804293841123581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01168404147028923, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010825186036527157, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.010648001916706562, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007293461821973324, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007136398460716009, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00691986083984375, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004546015989035368, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.24966034293174744, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.234743133187294, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22982831299304962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2088402360677719, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11755895614624023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11201856285333633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13140179216861725, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12114804238080978, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11916925013065338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10587987303733826, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10068711638450623, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06680998206138611, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05785321444272995, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05620726943016052, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05581337958574295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03334314003586769, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028643636032938957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028517447412014008, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02623106725513935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025980448350310326, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017214585095643997, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016892440617084503, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01658514328300953, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010606455616652966, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2028915137052536, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16791929304599762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15574824810028076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.13470414280891418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09290424734354019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07889285683631897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11089175939559937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10132083296775818, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09737423062324524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07237733900547028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06738336384296417, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.057065773755311966, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.049512386322021484, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04571293666958809, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04479687288403511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028922071680426598, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025327706709504128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02507922798395157, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021639201790094376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021040048450231552, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016291331499814987, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.018039051443338394, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015124400146305561, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.014312369748950005, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19989024102687836, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1880481094121933, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18445222079753876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16779470443725586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09415972232818604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08991769701242447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10434508323669434, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09662099927663803, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09542684257030487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08496950566768646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08073681592941284, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05314214900135994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04622476547956467, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.045109301805496216, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04483655095100403, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02655106969177723, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.023247530683875084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023172006011009216, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021385198459029198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021216973662376404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01393888983875513, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014080190099775791, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013573835603892803, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009416344575583935, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24511727690696716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2307104915380478, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.2262498140335083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20592480897903442, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11533050984144211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11017964035272598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1277938187122345, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11832772195339203, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11690376698970795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.1041274443268776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09896676987409592, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0649101659655571, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05650092661380768, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.055138133466243744, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05482657626271248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03244037553668022, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028072237968444824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02798427827656269, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02574673853814602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02554204873740673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016842400655150414, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01647125743329525, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01639297604560852, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010319806635379791, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.23972009122371674, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21518060564994812, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.20518989861011505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1829683929681778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10991344600915909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10014871507883072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12958359718322754, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11947224289178848, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11356805264949799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09543837606906891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09003150463104248, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06599839776754379, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05722091719508171, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05287611484527588, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05180048942565918, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03321342170238495, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027557186782360077, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027285318821668625, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.024688366800546646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02400495484471321, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017774591222405434, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.017644591629505157, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016353566199541092, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012010023929178715, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12748265266418457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11964880675077438, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11666391789913177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10593046247959137, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.059800900518894196, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05672864243388176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06738550215959549, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.062390413135290146, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.060658153146505356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05382262542843819, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05109862983226776, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03415261209011078, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029771817848086357, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028607772663235664, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028326671570539474, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017054855823516846, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014599463902413845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01449219137430191, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013369296677410603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01319136843085289, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00883288774639368, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008716036565601826, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008447027765214443, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005492148920893669, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10890508443117142, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10217563807964325, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09951334446668625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09039275348186493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.051104653626680374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04841001331806183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.058007072657346725, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.053529661148786545, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05183103308081627, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04601575806736946, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04383714497089386, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029442690312862396, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02554829977452755, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02446751855313778, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024200471118092537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014712228439748287, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012497198767960072, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012391221709549427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011453949846327305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01128994207829237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00763555895537138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007492855191230774, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007272362243384123, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004742878954857588, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2526431977748871, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23735766112804413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.232477068901062, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2113732099533081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1188751608133316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11332672834396362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1325748860836029, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12237710505723953, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12055447697639465, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10710744559764862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10179416835308075, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06741326302289963, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05842730030417442, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05683448538184166, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.056465376168489456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033621128648519516, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028880473226308823, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028755083680152893, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026429587975144386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026180757209658623, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017305543646216393, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016895972192287445, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016702674329280853, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010426416993141174, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19987237453460693, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17629262804985046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.16863790154457092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14464865624904633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09234406799077988, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08350531756877899, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10630945861339569, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09812872111797333, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09568297117948532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07679387927055359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06952553987503052, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05448026582598686, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04707302153110504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0443890281021595, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04373795911669731, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027239544317126274, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02299683913588524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02282647043466568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01986573077738285, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.019431743770837784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01439949031919241, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014367777854204178, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013521386310458183, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009640596807003021, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20298375189304352, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1908923238515854, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1871577650308609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1702824831008911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09565584361553192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0913313701748848, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10612762719392776, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09818193316459656, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0969686359167099, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08625990152359009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08199115842580795, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05406991392374039, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04698273539543152, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04584190994501114, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.045562297105789185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02704477868974209, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02359144762158394, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023519620299339294, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02168734185397625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021517416462302208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014252232387661934, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014242542907595634, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013880070298910141, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00946838315576315, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24695564806461334, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23225319385528564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22780941426753998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.20724010467529297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11620615422725677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11097156256437302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1287495493888855, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11922429502010345, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11780150234699249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10483825951814651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09958246350288391, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0654364749789238, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05691879615187645, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05554644390940666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.0552239865064621, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03265444561839104, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028245914727449417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02815597876906395, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.025885794311761856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02568020112812519, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01688128337264061, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01651669666171074, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016428256407380104, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010252827778458595, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24477717280387878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2202966958284378, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21042612195014954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18768410384655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11229021847248077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10262154787778854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13181595504283905, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12155797332525253, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11584677547216415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09757168591022491, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09185558557510376, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0667356625199318, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.05797354131937027, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05380070209503174, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05277978256344795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.033397261053323746, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.0276675783097744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027409320697188377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02469136379659176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02402646839618683, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017535382881760597, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01716724783182144, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016178086400032043, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011028707958757877, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12666215002536774, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11876422166824341, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11570997536182404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1049642488360405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05944836512207985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056310418993234634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06728110462427139, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06219976395368576, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06032039597630501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0534658208489418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05082053691148758, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03416632488369942, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.029739094898104668, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028476836159825325, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028169013559818268, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017091499641537666, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014603237621486187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014477218501269817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013373952358961105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013178699649870396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00892319530248642, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008832253515720367, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00850633718073368, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005690694320946932, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10836118459701538, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10158360749483109, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09882984310388565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08968602120876312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.050836045295000076, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04810106381773949, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.057882048189640045, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05338345468044281, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05157015845179558, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04571394622325897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04350975528359413, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.029366055503487587, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025485238060355186, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024333739653229713, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0240543931722641, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01467034313827753, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012440647929906845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012325973249971867, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011392304673790932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011215057224035263, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.0076165408827364445, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007483563851565123, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0072324215434491634, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0047456431202590466, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2514009177684784, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23599034547805786, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23105622828006744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2098124474287033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11827705800533295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11263896524906158, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1321202963590622, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12190685421228409, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11991143226623535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10640434920787811, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.1010717898607254, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06711313873529434, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.058164648711681366, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05650682747364044, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05611128732562065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033487264066934586, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02868156135082245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028551749885082245, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026219462975859642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025961177423596382, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017187796533107758, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01674484647810459, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016561569646000862, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010246881283819675, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1943625658750534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1506865918636322, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.13723081350326538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11755585670471191, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08479787409305573, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06926219165325165, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1060994565486908, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0952916070818901, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09068591147661209, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0659700334072113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0606250986456871, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05276983603835106, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04735632985830307, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04268714040517807, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04147697612643242, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027217580005526543, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025050049647688866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.024800878018140793, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02191055938601494, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021245284005999565, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015895232558250427, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.019273314625024796, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0145676014944911, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.016294661909341812, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2079804688692093, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1956048458814621, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1917474865913391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17434662580490112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09808199107646942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09359433501958847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10883169621229172, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10071263462305069, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09944100677967072, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08841572701931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08396903425455093, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05546768754720688, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.048210762441158295, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04702017828822136, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04674745351076126, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02781057171523571, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0242520309984684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024172769859433174, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02229386754333973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022115541622042656, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014768279157578945, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014704273082315922, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014385020360350609, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00985436886548996, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25079047679901123, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23584356904029846, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23128651082515717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21040096879005432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11808957904577255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1127396672964096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.131145641207695, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12118381261825562, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11971617490053177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10647844523191452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10118754208087921, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06663491576910019, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05792643874883652, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.056510720402002335, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05618513375520706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03338385373353958, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028845660388469696, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02875283733010292, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026447303593158722, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026236774399876595, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01752403751015663, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017033889889717102, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017064349725842476, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010815559886395931, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24625268578529358, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2220795601606369, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21204830706119537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18918627500534058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11318278312683105, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10350459069013596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13293921947479248, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12285410612821579, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11673922091722488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09864889830350876, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09292148798704147, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06760183721780777, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.058809563517570496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05440801382064819, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.053339824080467224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03388137370347977, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028324317187070847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028039567172527313, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025437377393245697, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024749403819441795, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.01789109781384468, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018064741045236588, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01644572615623474, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012229109182953835, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13084974884986877, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12270914763212204, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.119777612388134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10873917490243912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06149926781654358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.058348409831523895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06922109425067902, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06403786689043045, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06239209324121475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05530485510826111, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05250798910856247, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.035088203847408295, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030589837580919266, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02941271662712097, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02913738414645195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01752319000661373, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014994891360402107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014886156655848026, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013710997998714447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013535091653466225, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009051459841430187, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008909937925636768, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008664616383612156, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005573638714849949, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11359666287899017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10650058835744858, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10381221026182175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09427695721387863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05335509032011032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05058140307664871, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06037372723221779, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05574426427483559, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05412779003381729, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04800195246934891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04567074403166771, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.030636850744485855, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026616130024194717, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025524983182549477, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025269897654652596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015309321694076061, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013025493361055851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012922474183142185, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011924149468541145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011758377775549889, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007934274151921272, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007775942794978619, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007572330068796873, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0048827012069523335, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2577415704727173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2421073168516159, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23712286353111267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2154557704925537, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12143391370773315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11574066430330276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1354135423898697, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12502674758434296, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12314914911985397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10932924598455429, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10380934178829193, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06879691034555435, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05968339368700981, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0580337718129158, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.057639531791210175, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034313321113586426, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029446519911289215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029322683811187744, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02692328579723835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026667771860957146, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017623422667384148, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01715988479554653, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0170120969414711, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010478965006768703, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2031782567501068, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1680861860513687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15617132186889648, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1334357112646103, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09205396473407745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07792104780673981, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11008710414171219, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10105982422828674, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09744534641504288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07179703563451767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06757616996765137, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05667370930314064, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04866278916597366, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04454103112220764, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04351368919014931, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.028497405350208282, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.0234579686075449, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023229777812957764, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019501175731420517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018793538212776184, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015578026883304119, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015417440794408321, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014296519570052624, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010822799988090992, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.21170441806316376, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1990063637495041, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1950419694185257, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17736943066120148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09987270832061768, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09529686719179153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11097750067710876, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10253150016069412, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10123282670974731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08997160941362381, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08552505820989609, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05656431242823601, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.049086809158325195, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04787709563970566, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.047594860196113586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028310254216194153, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02469663880765438, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02461407147347927, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022690752521157265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02251007780432701, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014995159581303596, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014984720386564732, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01460142433643341, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010058312676846981, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2534356415271759, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23825353384017944, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23356813192367554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21236930787563324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11943808943033218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1139635369181633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1323716789484024, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12257934361696243, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.1210736483335495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10759592801332474, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10215184092521667, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06733891367912292, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05855652689933777, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05711575597524643, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05677798017859459, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033623188734054565, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029077202081680298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028981870040297508, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02662835456430912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026407286524772644, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017439892515540123, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01706508733332157, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01696515642106533, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010676237754523754, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2532232105731964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22831973433494568, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21815481781959534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19448795914649963, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11639228463172913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10653427243232727, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1370869129896164, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12608851492404938, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12003709375858307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10135290026664734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09540682286024094, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06974261999130249, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.060278136283159256, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05589216947555542, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054818570613861084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03516357019543648, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.028952892869710922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02867296151816845, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025944674387574196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025252392515540123, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018951907753944397, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018269754946231842, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017522791400551796, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01214493066072464, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.136132150888443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12764854729175568, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12458611279726028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1130184456706047, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06397570669651031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06070337072014809, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07215560972690582, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06664709746837616, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06488897651433945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05752728134393692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05465823784470558, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03666716814041138, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03183097764849663, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030619297176599503, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030331861227750778, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01832037977874279, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01563471183180809, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015524487011134624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014303618110716343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014118624851107597, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009523184970021248, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00933550763875246, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009105676785111427, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005900211166590452, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11675966531038284, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10949157923460007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10673538595438004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09686700254678726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05489542335271835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05204020068049431, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06219958886504173, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.057354193180799484, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0556924007833004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.049377065151929855, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04699034243822098, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031612396240234375, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027410665526986122, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026292117312550545, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026023371145129204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01579015888273716, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013425670564174652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013321899808943272, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012290083803236485, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01211788970977068, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008204842917621136, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008033962920308113, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007833418436348438, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.0050763715989887714, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2591395080089569, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2432371973991394, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.2382611632347107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21626082062721252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12209407240152359, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11631924659013748, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13603810966014862, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12566760182380676, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1238260269165039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1098058819770813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10417710989713669, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06911537051200867, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05996653437614441, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0583471842110157, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05795595049858093, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03447785973548889, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029631122946739197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.02951708436012268, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027074065059423447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.026823539286851883, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017732495442032814, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017311887815594673, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017132679000496864, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010655400343239307, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2174527794122696, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18858250975608826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.17727138102054596, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14962483942508698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09957437962293625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.08890685439109802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12003128975629807, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1101987361907959, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1043248400092125, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.08077218383550644, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07537151128053665, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.061746347695589066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05308549106121063, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.0482531413435936, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04705981910228729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03097590245306492, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.025572434067726135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.025194920599460602, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.021939558908343315, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02114368975162506, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01682322472333908, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017068706452846527, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.015308080241084099, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01219302136451006, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2103559374809265, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19765713810920715, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19367891550064087, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17612725496292114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09927234798669815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09469394385814667, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1101759672164917, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10195481777191162, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10065415501594543, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08938495814800262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08487297594547272, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05617167428135872, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04880928248167038, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04758691042661667, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04729285463690758, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028073862195014954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02450091950595379, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02442241460084915, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02249065972864628, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.02230989560484886, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01476175244897604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014811735600233078, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014355460181832314, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009856942109763622, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.254763662815094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23942260444164276, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23464934527873993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21329598128795624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12011612206697464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11461685597896576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1333557367324829, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12332327663898468, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12181245535612106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10816860944032669, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10264686495065689, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06789935380220413, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05896289646625519, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05749703571200371, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05715109780430794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033913664519786835, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029367852956056595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029271434992551804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026901084929704666, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026679174974560738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01768912374973297, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017378900200128555, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01720542460680008, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011084838770329952, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25505512952804565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23012028634548187, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21988709270954132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1959536373615265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11732280254364014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10741177946329117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13752011954784393, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.1271515190601349, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12093127518892288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10217521339654922, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09609977900981903, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06990396976470947, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06079890578985214, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.056354865431785583, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.055271945893764496, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03503791242837906, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029216939583420753, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.028924236074090004, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026186998933553696, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025486597791314125, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018549390137195587, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018466724082827568, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01712249405682087, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012311926111578941, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13232257962226868, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12402638792991638, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12089382857084274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10965607315301895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.062204182147979736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.058964118361473083, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07014185935258865, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06491924822330475, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06311769783496857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05588831380009651, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05300923064351082, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0355982780456543, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.031004084274172783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029785271733999252, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029490219429135323, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017784181982278824, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015203178860247135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01508997194468975, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01389516144990921, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013711637817323208, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009228807874023914, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009077867493033409, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008825311437249184, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005723589565604925, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11407501995563507, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1069437712430954, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10414069145917892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09443952143192291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.053622208535671234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05077976733446121, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.060738012194633484, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05608407407999039, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05441741645336151, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04817865043878555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04579305648803711, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03086272068321705, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026795992627739906, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025679511949419975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.02541567198932171, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01541835255920887, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013128578662872314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013020469807088375, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012006026692688465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011835142970085144, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00800511334091425, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007874961011111736, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.0076408302411437035, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004998599644750357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2596847712993622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24359844624996185, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23853985965251923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.2164570391178131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1223272830247879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11650564521551132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13636910915374756, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12591968476772308, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12409801036119461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10995122045278549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10431160032749176, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06925662606954575, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.060091715306043625, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05846846476197243, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05807504057884216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034542158246040344, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029665639623999596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029550794512033463, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.0270853191614151, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02683127485215664, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01773522049188614, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01728915236890316, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017134519293904305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010581817477941513, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19901162385940552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.16283638775348663, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14700935781002045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12570002675056458, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09082995355129242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07430332154035568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11384785920381546, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10430596768856049, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09535456448793411, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06959289312362671, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06561243534088135, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.058699168264865875, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.050174642354249954, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04401467368006706, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04246091842651367, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.029389947652816772, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023304276168346405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022703178226947784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019198909401893616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01812026835978031, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.015773853287100792, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.015796637162566185, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013729768805205822, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011023157276213169, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2095573991537094, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19678525626659393, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19285599887371063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17516715824604034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09894123673439026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09433750808238983, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10993891954421997, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10163199156522751, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10031019896268845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08899203687906265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08448982238769531, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05606379359960556, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04868200048804283, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.047449223697185516, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04716625064611435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.028068380430340767, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02449939213693142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02441907674074173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022487033158540726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022305632010102272, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014880599454045296, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01490034069865942, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01447267085313797, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01003083772957325, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2565513253211975, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.24101808667182922, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23625802993774414, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2146483212709427, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12105518579483032, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11542928218841553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1342952698469162, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12426777929067612, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12272477895021439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10888507962226868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10327115654945374, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.0683383047580719, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05938885360956192, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05790058150887489, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05754714459180832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03411651402711868, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029498301446437836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029399197548627853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026985114440321922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026759551838040352, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01770545355975628, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01734808459877968, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.017219340428709984, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010876976884901524, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.26072901487350464, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2353825569152832, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22510558366775513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.2006818950176239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.12003177404403687, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10995246469974518, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14162932336330414, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12978026270866394, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12370967864990234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10457675904035568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09836356341838837, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07167062163352966, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.062086693942546844, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05767887830734253, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05660388991236687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0360625758767128, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029934819787740707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029658759012818336, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02684491127729416, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.026156628504395485, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019308213144540787, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018928853794932365, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017867963761091232, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012692793272435665, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13541413843631744, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12682682275772095, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12362945079803467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11204040050506592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0636674091219902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06029517948627472, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07185181975364685, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06644172221422195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06460403650999069, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.057109490036964417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.054150499403476715, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.036433376371860504, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03171638399362564, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030465412884950638, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.03017016500234604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018204139545559883, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015569659881293774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015457039698958397, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014226303435862064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01403014175593853, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009439246729016304, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009319625794887543, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00901547260582447, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005906469188630581, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1153089851140976, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10805089771747589, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10514365136623383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0953557938337326, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05422079935669899, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05129969120025635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0615672692656517, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05679726600646973, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.055020663887262344, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048676006495952606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046279020607471466, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031274523586034775, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.027133431285619736, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025971991941332817, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025686921551823616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01562507078051567, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013276188634335995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01316142175346613, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012133308686316013, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011952671222388744, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008119288831949234, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007971002720296383, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007729723118245602, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005054909270256758, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26440516114234924, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24797020852565765, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24269700050354004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.22018536925315857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12461893260478973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11864946782588959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13892844319343567, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12834377586841583, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1264660805463791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11188385635614395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10609425604343414, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07060150802135468, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06127284839749336, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05957484990358353, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05917828902602196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03522151708602905, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030250001698732376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030129142105579376, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027588054537773132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027328917756676674, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018102651461958885, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017648929730057716, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017479784786701202, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010837160982191563, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2095007300376892, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.17122645676136017, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1564401388168335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12478030472993851, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09348037838935852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.079010508954525, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.11663311719894409, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10671591758728027, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10027491301298141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07015027105808258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06603177636861801, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05985737219452858, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.0512627437710762, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.045210130512714386, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04367975518107414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.030006567016243935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.023894052952528, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.023452039808034897, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.019402114674448967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01831582933664322, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016138028353452682, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016096891835331917, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.014168244786560535, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.011190952733159065, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20745986700057983, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19480222463607788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19083787500858307, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1732437014579773, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0979962944984436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.093401700258255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10882236063480377, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1006927341222763, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0993746891617775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08811537176370621, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08364763855934143, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05550452694296837, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.048244018107652664, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04701557010412216, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04672791808843613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02776426263153553, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024290909990668297, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024207595735788345, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022289050742983818, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.022107068449258804, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014645281247794628, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014798355288803577, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014243979938328266, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009995859116315842, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2550736963748932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.2395395040512085, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23472094535827637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21320663392543793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12034447491168976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1147565171122551, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13349005579948425, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.1236124187707901, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12205610424280167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10824534296989441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10269687324762344, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06797496229410172, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05908476933836937, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.057602908462285995, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05725500360131264, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03394431993365288, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.029357081279158592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029263921082019806, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026848584413528442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026626039296388626, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017623361200094223, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01728668436408043, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01713225431740284, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01088620349764824, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.32.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.26012617349624634, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.2346065640449524, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.22440440952777863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.20003336668014526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.1197216734290123, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10966428369283676, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1403794139623642, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12940281629562378, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.1234595850110054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10427825152873993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09804525971412659, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07139299809932709, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06189090758562088, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0574793703854084, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05641578510403633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03597144037485123, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029770726338028908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029499707743525505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026680706068873405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0259881392121315, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019339419901371002, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01874300092458725, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.017933350056409836, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.01245532650500536, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13168415427207947, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12331278622150421, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12008750438690186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10873772203922272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.061919115483760834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0586172491312027, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.07001674175262451, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06475663185119629, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06286130100488663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.055549945682287216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0526956282556057, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03555737063288689, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030938630923628807, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029653582721948624, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029346363618969917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017768308520317078, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015156738460063934, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015033516101539135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01383681409060955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013640105724334717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009222421795129776, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0090837636962533, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00879503320902586, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005758867133408785, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11394012719392776, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10669007897377014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10387247800827026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09405195713043213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05357387661933899, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.050665561109781265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06070152670145035, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05609336122870445, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.0543697252869606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.048045769333839417, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04561421647667885, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03084930032491684, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026814138516783714, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02566215954720974, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025387901812791824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01541582029312849, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01312052272260189, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013010201044380665, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011983487755060196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011809976771473885, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008006495423614979, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.00788415689021349, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007628640625625849, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005008638836443424, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2603907287120819, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2441900074481964, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23903203010559082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21670188009738922, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12274906039237976, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11680512875318527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13686807453632355, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12655861675739288, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12454201281070709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1102079525589943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10442308336496353, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06953730434179306, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.06039360910654068, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05868398770689964, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05826462432742119, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.034686096012592316, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029769491404294968, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029643641784787178, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027155587449669838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02688848227262497, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017794068902730942, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017356546595692635, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017176851630210876, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010580320842564106, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.21943122148513794, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.18150019645690918, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1687004417181015, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.14593714475631714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10025665163993835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0853971391916275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1202753409743309, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.10911478102207184, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.10493911802768707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0787273421883583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.07258591055870056, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06147016957402229, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.052804820239543915, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04884110018610954, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04786667600274086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.031143011525273323, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026298170909285545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026049060747027397, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02223014086484909, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.021603228524327278, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017554331570863724, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01787933148443699, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016374874860048294, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.013451055623590946, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20554634928703308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19285717606544495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1889176219701767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17140957713127136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09710128605365753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09253735840320587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10802296549081802, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09981079399585724, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09848914295434952, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08725914359092712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08275454491376877, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05516361445188522, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04784735292196274, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.0466122068464756, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04631911218166351, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027613937854766846, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024109510704874992, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02402893640100956, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02211596816778183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021937310695648193, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014678003266453743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014739708974957466, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014282915741205215, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010008303448557854, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.25455978512763977, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23894602060317993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.23413532972335815, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.21258071064949036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.12021689862012863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.11458435654640198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.13370651006698608, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.12351427972316742, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.12194204330444336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10805687308311462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.10249725729227066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06814473867416382, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05911310017108917, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05761108547449112, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05725933611392975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03409000486135483, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.0294953566044569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.029392188414931297, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02698661759495735, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.026759928092360497, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017957380041480064, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.017578808590769768, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.01746828481554985, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01136322133243084, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.33.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2597723603248596, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23423127830028534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.223247230052948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.19898545742034912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11955218762159348, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10912679135799408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.14186343550682068, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.13046696782112122, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12341132014989853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.1041688323020935, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09800016134977341, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.07206329703330994, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06243571639060974, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.057478196918964386, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.056240878999233246, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.03641843423247337, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02984241582453251, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029493745416402817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.026775386184453964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.025987323373556137, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.019748445600271225, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01900472678244114, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.018188610672950745, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012695507146418095, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13516458868980408, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12644802033901215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.12306338548660278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.11138923466205597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06353019922971725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.06011400371789932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0718424916267395, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06648776680231094, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06450256705284119, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05692179873585701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.05393465980887413, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03647640720009804, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03175530955195427, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.03043700009584427, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.0301189124584198, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018216341733932495, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015570355579257011, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.015443671494722366, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.014202754013240337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.014000804163515568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00946592167019844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009356711059808731, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009031877852976322, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005953591782599688, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11419837176799774, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10683643072843552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1038774698972702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09401015937328339, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.053699783980846405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05073722079396248, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06113097444176674, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05641280859708786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.054516758769750595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0481262132525444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.045701764523983, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031061261892318726, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026976963505148888, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02574148029088974, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025445085018873215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015528259798884392, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013184604234993458, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01306464895606041, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01204423326998949, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011855666525661945, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008089764975011349, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007976135239005089, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007687400560826063, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005106122232973576, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.26411235332489014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24753636121749878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24226520955562592, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21941664814949036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12455041706562042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11849931627511978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1388504058122635, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12833216786384583, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.1263791024684906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11168745160102844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10578900575637817, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.07059042900800705, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061272528022527695, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05955290049314499, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05913656949996948, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03520595654845238, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030231034383177757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.03010880947113037, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027544382959604263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.027285657823085785, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01808418333530426, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01764834299683571, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017449503764510155, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010820022784173489, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19747459888458252, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1652660369873047, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.15358662605285645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12735459208488464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08868446201086044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07647951692342758, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10779409855604172, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09937417507171631, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09457520395517349, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07065124809741974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06411535292863846, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.055111996829509735, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04764173924922943, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04272598400712013, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04149675369262695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027548929676413536, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.022263050079345703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021938960999250412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01869775354862213, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.017855070531368256, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014556756243109703, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014495461247861385, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01297159306704998, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009680037386715412, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2043793946504593, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19171567261219025, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.18777935206890106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1704285740852356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09658575057983398, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09202301502227783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10734723508358002, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09927905350923538, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09797313064336777, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08675782382488251, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08227305859327316, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05482366681098938, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04760999232530594, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04638521373271942, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04609953239560127, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.027427110821008682, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024003678932785988, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023924948647618294, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022019995376467705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021835142746567726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01453638169914484, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014685270376503468, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014141150750219822, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00998427253216505, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.24718190729618073, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.23190155625343323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22717975080013275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.2062525451183319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11669284105300903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1111975610256195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.1297423541545868, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11993761360645294, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11839506775140762, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10483919084072113, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09944786876440048, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06615731865167618, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05735905095934868, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05590495839715004, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05555945634841919, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.033071376383304596, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.028573689982295036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.028471777215600014, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.026125650852918625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025900810956954956, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.017341287806630135, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.016948232427239418, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016858527436852455, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.01084466278553009, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.34.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.2561282217502594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.23017816245555878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21907082200050354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1951335370540619, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11764314770698547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.10715638101100922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1394953578710556, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12869909405708313, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.12170110642910004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.10233297199010849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09626330435276031, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0709320530295372, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06157604604959488, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.0565776564180851, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05534624308347702, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.035543572157621384, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.029398875311017036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.029062725603580475, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02634597197175026, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.02555694989860058, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018853483721613884, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018760576844215393, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01724906824529171, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012573231011629105, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13110341131687164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.12266219407320023, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11942300200462341, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1081002801656723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06165874004364014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05831708386540413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06965281814336777, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06450428813695908, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06260296702384949, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05520346015691757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0522778257727623, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03535611927509308, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030826039612293243, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.029541200026869774, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029231315478682518, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017671959474682808, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.015111499466001987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014990605413913727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.013782291673123837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013587597757577896, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.009182051755487919, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.009086496196687222, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008762063458561897, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005792796146124601, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11042314022779465, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10328268259763718, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10043651610612869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09092091768980026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05190024897456169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0490376278758049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05898204445838928, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05449383705854416, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05268680676817894, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04648616537451744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04413202032446861, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02997337281703949, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.026036595925688744, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024873798713088036, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024592561647295952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014982592314481735, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012742429971694946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012628125958144665, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011637561954557896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011459925211966038, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007813043892383575, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007701145950704813, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007432625163346529, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004945224151015282, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2632427215576172, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.24669869244098663, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.24145960807800293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21878394484519958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.1241854727268219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11815236508846283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13842180371284485, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12786050140857697, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12595948576927185, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.11133801192045212, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10545964539051056, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.0703728049993515, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.061097078025341034, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05938649922609329, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05898389592766762, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.035100724548101425, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.030145175755023956, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.030021285638213158, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.027454640716314316, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02719985507428646, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.018025442957878113, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017582930624485016, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.017408180981874466, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010777479037642479, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.19734732806682587, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1660013496875763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1557064950466156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.1306244283914566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.09110426902770996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07961858063936234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.1063830778002739, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09786038100719452, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.09481409937143326, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.07070602476596832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.06388978660106659, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05452024191617966, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.04701842740178108, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04393526166677475, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.043189045041799545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.027289705350995064, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.02294577658176422, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.022746972739696503, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01892668940126896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.018396684899926186, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014572867192327976, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014667732641100883, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.013582204468548298, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010106686502695084, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.2016022503376007, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18906603753566742, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1851857751607895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16803409159183502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09528957307338715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09075025469064713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10580506920814514, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09797839820384979, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09668318182229996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08554114401340485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08107715100049973, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05396777018904686, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04695143550634384, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04573237895965576, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04544303938746452, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.026993876323103905, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02361535280942917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.023530224338173866, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.021636106073856354, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021457428112626076, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.014229673892259598, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014375006780028343, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013828830793499947, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009685808792710304, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.23959723114967346, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.22481666505336761, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.22023025155067444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.19986990094184875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.11316784471273422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.10781531780958176, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.12564173340797424, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.11627011746168137, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.11478439718484879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.10163091868162155, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.096248097717762, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06401818245649338, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.05558812618255615, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05416642874479294, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05382869020104408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.03195201978087425, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.027630599215626717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.027533458545804024, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.02524069882929325, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.025025874376296997, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.016611743718385696, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.0163034088909626, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.016136737540364265, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010303996503353119, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.35.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.25217458605766296, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.22467397153377533, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.21312101185321808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18981009721755981, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11515329033136368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.104237399995327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.13761214911937714, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12690041959285736, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11979936808347702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09989403188228607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.09408599883317947, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.0699319839477539, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.06064479053020477, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05531713366508484, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.054000869393348694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.034982532262802124, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.02862892858684063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02829849347472191, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.025569042190909386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024719495326280594, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018399232998490334, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018192609772086143, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016651254147291183, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011928923428058624, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.12784267961978912, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11945996433496475, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11613459885120392, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.10496499389410019, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06008951738476753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.056688591837882996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0681576356291771, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06312547624111176, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06099681556224823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0537358783185482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.050886258482933044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.034613799303770065, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.030175581574440002, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.028789455071091652, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.028456563130021095, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.017318975180387497, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014739848673343658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014602649956941605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01343502290546894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013220832683146, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00901227630674839, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008900233544409275, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.008566712960600853, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005677599459886551, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10514489561319351, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09823814034461975, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09533639997243881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08617959171533585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04936042055487633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04651379585266113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05634670332074165, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05209485813975334, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05009927600622177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04417288675904274, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0418381430208683, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.028603259474039078, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024893932044506073, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.02365775778889656, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.023358268663287163, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014294414781033993, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012127349153161049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012000533752143383, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011064873076975346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01087234728038311, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007450019475072622, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007364306598901749, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007052851840853691, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004719694145023823, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2570268213748932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2408076524734497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.23561377823352814, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.21335473656654358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.12123703211545944, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11528260260820389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13541580736637115, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12498432397842407, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.12299967557191849, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10863632708787918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.10285375267267227, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06886698305606842, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05970282107591629, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05800606682896614, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.057590026408433914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.03436225280165672, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.029481489211320877, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.029363801702857018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.026864761486649513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02659902535378933, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017687326297163963, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.017292017117142677, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01705002784729004, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010701093822717667, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.18835796415805817, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1560121774673462, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.14510542154312134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.12068279832601547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.08549892157316208, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.07368535548448563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.10212992876768112, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.09376126527786255, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.090646892786026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.06668126583099365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.059756796807050705, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.05251790210604668, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.045180194079875946, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.04144049063324928, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.04050908982753754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.026379667222499847, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.021971948444843292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.021779818460345268, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.018356679007411003, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.017729459330439568, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.014301944524049759, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.014633125625550747, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.01310957595705986, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010532282292842865, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.19362309575080872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.18148507177829742, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.17771920561790466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.16130191087722778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09153327345848083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.08713432401418686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10168778151273727, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.09410952031612396, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.09284387528896332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.08215846866369247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.0778067484498024, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05190417170524597, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04512642323970795, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04394302889704704, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.043665360659360886, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.025973889976739883, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02272867225110531, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.022647207602858543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.020835168659687042, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.020657144486904144, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013735697604715824, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013904737308621407, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.013348028063774109, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009432231076061726, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.22559157013893127, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.21158264577388763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.20725369453430176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.18804985284805298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.10655543208122253, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.1014925017952919, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.11832650005817413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10952583700418472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.10810887068510056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0956738293170929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.09062501788139343, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.06030392274260521, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0523836612701416, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.05103788897395134, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.05072448030114174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.030112147331237793, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02607687935233116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.025982322171330452, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.023822292685508728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.023616084828972816, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01567680388689041, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01544965710490942, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.015232346951961517, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00984707847237587, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.36.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.24142225086688995, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.21431997418403625, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.2029113620519638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.18053452670574188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.11035813391208649, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.0994817316532135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.1328958123922348, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.12182880938053131, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.11487697064876556, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.09531747549772263, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08984656631946564, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06763362139463425, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.0584229938685894, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.05324764922261238, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.05197793245315552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.0340263694524765, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.027982838451862335, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.027659334242343903, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.02503778412938118, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.024221105501055717, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.018372004851698875, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.018341409042477608, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.016688859090209007, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012748467735946178, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.1185387670993805, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11067822575569153, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.1074851006269455, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09709686785936356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.05565210059285164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.0524623766541481, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06349504739046097, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05862328037619591, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05653103440999985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04976103454828262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.047206293791532516, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.032276857644319534, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02804500050842762, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.026708725839853287, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.026384716853499413, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01616096869111061, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.013749189674854279, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013616415672004223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012545520439743996, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012341064400970936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.00847513135522604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008418870158493519, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.00804627500474453, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005525330547243357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10061874240636826, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.09398555755615234, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0910782590508461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0822758674621582, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04722151905298233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.044409964233636856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.054176539182662964, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.050033826380968094, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.047957643866539, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04223070666193962, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.04004606232047081, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027527550235390663, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.023931754752993584, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022656921297311783, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.022355003282427788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013786016032099724, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011658367700874805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01152582187205553, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.010645083151757717, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.01045035757124424, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007232473231852055, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0071598440408706665, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006816981825977564, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004676498007029295, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2460603415966034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.2302311509847641, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22509615123271942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20348717272281647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11601191014051437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11012273281812668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12971846759319305, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.11977189779281616, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11770977079868317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.10379137843847275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09821616858243942, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06597256660461426, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.057227812707424164, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05550220608711243, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05508633330464363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.032928045839071274, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028246596455574036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028113916516304016, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02571142092347145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02544526755809784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016983378678560257, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016619278118014336, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016356702893972397, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01033503282815218, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13384605944156647, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1166093721985817, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.11163719743490219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.09547997266054153, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06205037981271744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05550128594040871, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0713624581694603, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06499212235212326, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06374774873256683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.05043996870517731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.046254951506853104, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03664657846093178, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03206261247396469, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030732430517673492, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.030426891520619392, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01865587756037712, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01732666604220867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01726202480494976, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.015285352244973183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.015093715861439705, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.01077950932085514, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.01247975043952465, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010391682386398315, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010312804952263832, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.18309397995471954, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.17169003188610077, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.16811373829841614, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1524963080883026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.08654360473155975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0823839083313942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.0963435098528862, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08900992572307587, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.08778442442417145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0777006670832634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.07365504652261734, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.049225643277168274, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04273492842912674, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04158424586057663, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.04131720960140228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02468474581837654, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.02157745137810707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.02150149643421173, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019796857610344887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.01962623931467533, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.013183287344872952, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.01329604908823967, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012795496731996536, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009142391383647919, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.20847219228744507, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.19549816846847534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.19144923985004425, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.17371177673339844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.09848449379205704, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.09376313537359238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.10956121981143951, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.10128608345985413, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0999128520488739, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.0884074866771698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.08373649418354034, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.05590038746595383, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.04851321503520012, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.04722420871257782, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.046926502138376236, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02794666588306427, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.024236951023340225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.024151833727955818, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.022173603996634483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.021979758515954018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01468832977116108, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014539199881255627, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.014261039905250072, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.009504619985818863, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.37.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.22592802345752716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.19877474009990692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.18796569108963013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.1672559231519699, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.10307568311691284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.09240161627531052, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.12394128739833832, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.11355343461036682, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.10768673568964005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.08841606229543686, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.08334144949913025, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.06339413672685623, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.054630693048238754, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.04988281801342964, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.04871970787644386, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.031987641006708145, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.026486745104193687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.02624579705297947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.023662224411964417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.022930677980184555, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.017542310059070587, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.01767130196094513, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.01604858599603176, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.012715778313577175, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.11352521926164627, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.1061544343829155, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10313057154417038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.0932389348745346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.053363751620054245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05036325007677078, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06104021519422531, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0562329925596714, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.05416075512766838, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0477946400642395, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0454072467982769, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.031051693484187126, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.02690398134291172, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.025616560131311417, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.025308050215244293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015557490289211273, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01318275649100542, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.013054221868515015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012046224437654018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011845807544887066, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.008181431330740452, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.008068358525633812, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007770645432174206, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005274214316159487, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09503193199634552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.08883699774742126, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.08612052351236343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.07782503217458725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.044622957706451416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04200831055641174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.0512506365776062, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.047322411090135574, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.045299917459487915, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03994329646229744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03788129240274429, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.026031257584691048, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.022640112787485123, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.021411120891571045, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021114366129040718, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.013039380311965942, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.011021759361028671, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.01089075580239296, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01007078681141138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.009881776757538319, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006846134085208178, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.0067808255553245544, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.006449990440160036, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004429024178534746, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2479836642742157, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.23234923183918, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.22736366093158722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.20572881400585175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.11705643683671951, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.11133195459842682, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.13062737882137299, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.12059197574853897, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11875592917203903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.1048755943775177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.0992230474948883, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06646259874105453, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05764210224151611, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05601759999990463, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.055632174015045166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.033144690096378326, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.028516199439764023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.028392277657985687, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02597496658563614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.025726431980729103, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.017070068046450615, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016766780987381935, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016475196927785873, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010458601638674736, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.13063473999500275, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.11168195307254791, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.10610683262348175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08656424283981323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.06090538948774338, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.05383401736617088, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.06990686804056168, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.06372947990894318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.06262845546007156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.0469084233045578, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.042115356773138046, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.03612828999757767, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.03127256780862808, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.030000751838088036, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.029687605798244476, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.018508287146687508, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.01667642779648304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.0166182741522789, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.01416962593793869, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.013979023322463036, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010787378065288067, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011761671863496304, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.010398121550679207, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.009535341523587704, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.14960341155529022, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.14028476178646088, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.13736748695373535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.1247427687048912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.07105040550231934, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.06765742599964142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07911738753318787, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.07309958338737488, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.07204756140708923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.06385733187198639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.060647282749414444, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04084170609712601, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.035727690905332565, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.03482282534241676, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03459992632269859, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02066117525100708, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01920296624302864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.019142093136906624, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.01788194105029106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.017756149172782898, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.011759285815060139, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.013308933936059475, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.011474189348518848, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.010744900442659855, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.16672064363956451, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.1563337743282318, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.15313705801963806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.13900475203990936, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0792107880115509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0754704475402832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.08840392529964447, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.08146579563617706, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.0803138017654419, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.07120327651500702, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.06766022741794586, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.04557839781045914, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.0398256853222847, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.038819603621959686, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.038584478199481964, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.02312464825809002, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.021407539024949074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.021345147863030434, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.019941207021474838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.019801322370767593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.01328415609896183, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.014840519987046719, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.012972913682460785, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.011972705833613873, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.38.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.16525423526763916, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.14326690137386322, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.13493795692920685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.12033693492412567, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.07511942833662033, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.06675256043672562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.09226508438587189, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.0830342024564743, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.0786702036857605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.06412465125322342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.06084855645895004, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.04736017808318138, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.04055551066994667, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.03695720061659813, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.036009591072797775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.024395419284701347, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.020456157624721527, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.020247679203748703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.018460653722286224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.0178888700902462, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.014290394261479378, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.014648916199803352, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.013094610534608364, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.011511306278407574, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.q_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.10800602287054062, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.10107780247926712, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.09812334179878235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.08883083611726761, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.0507272370159626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.04785768687725067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.05854935571551323, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.05359258875250816, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.051470816135406494, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.04547441750764847, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.043404702097177505, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.02978326380252838, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.025645483285188675, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.024350138381123543, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.024041978642344475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.014934425242245197, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.012541215866804123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.012405247427523136, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.011471184901893139, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.011274944990873337, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.007868262007832527, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.007707140874117613, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.007457637693732977, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.005067578982561827, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.k_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.08514058589935303, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07966839522123337, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.07714642584323883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.06977365165948868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.03995204716920853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03756125271320343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.046160988509655, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.0425097681581974, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04053722321987152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.035784896463155746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03398264944553375, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.023476209491491318, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.020353706553578377, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.019175034016370773, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.018894871696829796, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.01176389679312706, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.009912027046084404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.009785240516066551, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.00907160434871912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.008893529884517193, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.006225741468369961, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.006167838349938393, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.005846599582582712, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.004096714314073324, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.v_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.2300257533788681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.21562589704990387, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.21099045872688293, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.19116143882274628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.10851453244686127, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.10317575186491013, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.12287180870771408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.1119401603937149, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.11008433997631073, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.09730895608663559, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.09284856170415878, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.06284888833761215, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.05379079282283783, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.05217640474438667, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.05175594612956047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.0318499393761158, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.026889560744166374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.026753481477499008, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.02460642158985138, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.02435220591723919, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.016699867323040962, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.016191760078072548, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.016107920557260513, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.01057566050440073, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.self_attn.o_proj", + "numel": 26214400, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.17529296875, + "total_bits": 57024000.0, + "err": 0.09423431754112244, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 62266880.00000001, + "err": 0.07506432384252548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 68820480.0, + "err": 0.0690622627735138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72529296875, + "total_bits": 71441920.0, + "err": 0.05915624648332596, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22529296875, + "total_bits": 84549120.0, + "err": 0.04210502654314041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.72529296875, + "total_bits": 97656320.0, + "err": 0.03534337505698204, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 79464320.0, + "err": 0.055738016963005066, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 81927680.0, + "err": 0.046174306422472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.17529296875, + "total_bits": 83238400.0, + "err": 0.04433400183916092, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.52529296875, + "total_bits": 92413440.0, + "err": 0.03222266212105751, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.662646484375, + "total_bits": 96014080.0, + "err": 0.03139904513955116, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 105678720.0, + "err": 0.027971237897872925, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 108142080.0, + "err": 0.024053720757365227, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22529296875, + "total_bits": 110763520.0, + "err": 0.022247226908802986, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.32529296875, + "total_bits": 113384960.00000001, + "err": 0.021816033869981766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 131893120.0, + "err": 0.015242049470543861, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22529296875, + "total_bits": 136977920.0, + "err": 0.014339467510581017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.32529296875, + "total_bits": 139599360.0, + "err": 0.014252826571464539, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.52529296875, + "total_bits": 144842240.0, + "err": 0.012908383272588253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72529296875, + "total_bits": 150085120.0, + "err": 0.012682168744504452, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 158107520.0, + "err": 0.010025442577898502, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 160570880.0, + "err": 0.011952660046517849, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2313232421875, + "total_bits": 163350400.0, + "err": 0.009548316709697247, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 212999679.99999997, + "err": 0.010863317176699638, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.gate_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.1362408995628357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12766598165035248, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.1249508187174797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11334168910980225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.0641794428229332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.0610775426030159, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07156562805175781, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06606912612915039, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06512312591075897, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.05760161206126213, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05464325472712517, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.03650713339447975, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.031671009957790375, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.030815239995718002, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03061136230826378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.018250247463583946, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.01595250517129898, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.015893977135419846, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.014624771662056446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.014497150667011738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.009639425203204155, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.00978434644639492, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.009357016533613205, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.006667650304734707, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.up_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1751085069444445, + "total_bits": 153951744.0, + "err": 0.13414205610752106, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751085069444446, + "total_bits": 168107520.0, + "err": 0.12567202746868134, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251085069444446, + "total_bits": 185802240.0, + "err": 0.12302359193563461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7251085069444443, + "total_bits": 192880128.0, + "err": 0.11160669475793839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2251085069444443, + "total_bits": 228269568.0, + "err": 0.06362894177436829, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7251085069444443, + "total_bits": 263659008.0, + "err": 0.060558587312698364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031277126736111, + "total_bits": 214550400.0, + "err": 0.07105351239442825, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251085069444446, + "total_bits": 221191680.0, + "err": 0.06547696143388748, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1751085069444445, + "total_bits": 224730624.0, + "err": 0.06455463916063309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5251085069444446, + "total_bits": 249503232.0, + "err": 0.057131893932819366, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6625542534722224, + "total_bits": 259231488.0, + "err": 0.05427277833223343, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031277126736111, + "total_bits": 285329280.0, + "err": 0.036612920463085175, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125108506944445, + "total_bits": 291970560.0, + "err": 0.03182215988636017, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.225108506944444, + "total_bits": 299048448.0, + "err": 0.030978377908468246, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.325108506944445, + "total_bits": 306126336.0, + "err": 0.03078528866171837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031277126736111, + "total_bits": 356108160.0, + "err": 0.0184579249471426, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.225108506944444, + "total_bits": 369827328.0, + "err": 0.016761193051934242, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.325108506944445, + "total_bits": 376905216.0, + "err": 0.016708074137568474, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.525108506944444, + "total_bits": 391060992.0, + "err": 0.015527251176536083, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.725108506944444, + "total_bits": 405216768.0, + "err": 0.015415712259709835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031277126736111, + "total_bits": 426887040.0, + "err": 0.010340968146920204, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125108506944445, + "total_bits": 433528320.0, + "err": 0.011251755990087986, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.231277126736111, + "total_bits": 441042816.0, + "err": 0.010084656998515129, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125108506944445, + "total_bits": 575086080.0, + "err": 0.00875562522560358, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + }, + { + "key": "model.layers.39.mlp.down_proj", + "numel": 70778880, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1715892650462965, + "total_bits": 153702656.0, + "err": 0.07473891228437424, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37529296875, + "total_bits": 168120576.0, + "err": 0.0636572390794754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62529296875, + "total_bits": 185815296.0, + "err": 0.058870188891887665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.727144820601852, + "total_bits": 193024256.0, + "err": 0.05233796685934067, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.227144820601852, + "total_bits": 228413696.0, + "err": 0.03393569588661194, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.736404079861111, + "total_bits": 264458495.99999997, + "err": 0.029355164617300034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0313232421875, + "total_bits": 214553664.0, + "err": 0.043594613671302795, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12529296875, + "total_bits": 221204736.0, + "err": 0.03883286565542221, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1715892650462965, + "total_bits": 224481536.0, + "err": 0.035791803151369095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.523441116898148, + "total_bits": 249385216.0, + "err": 0.02875148504972458, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6644983362268517, + "total_bits": 259369088.0, + "err": 0.02751135639846325, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0313232421875, + "total_bits": 285332544.0, + "err": 0.02247179113328457, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12529296875, + "total_bits": 291983616.0, + "err": 0.019387561827898026, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.227144820601852, + "total_bits": 299192576.0, + "err": 0.017190000042319298, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3289966724537035, + "total_bits": 306401536.0, + "err": 0.016637181863188744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0313232421875, + "total_bits": 356111424.0, + "err": 0.01170820277184248, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.227144820601852, + "total_bits": 369971456.0, + "err": 0.010282129049301147, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.310478153935185, + "total_bits": 375869696.0, + "err": 0.010146130807697773, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.523441116898148, + "total_bits": 390942976.0, + "err": 0.009437567554414272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.727144820601852, + "total_bits": 405360896.0, + "err": 0.009129125624895096, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0313232421875, + "total_bits": 426890304.0, + "err": 0.006943284533917904, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12529296875, + "total_bits": 433541376.0, + "err": 0.008124057203531265, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.235026945891204, + "total_bits": 441308224.0, + "err": 0.006233265157788992, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12529296875, + "total_bits": 575099136.0, + "err": 0.006950531154870987, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ] + } + ], + "last_module_idx": 82, + "base_perplexity": 7.057848112946599 +} \ No newline at end of file